library(conos)
library(magrittr)
library(dplyr)
library(cacoa) # github.com/kharchenkolab/cacoa
## Warning: replacing previous import 'ape::where' by 'dplyr::where' when loading
## 'cacoa'
library(sccore)
library(scHelper) # github.com/rrydbirk/scHelper
library(qs)
library(ggplot2)
library(ggsci)
library(cowplot)
library(ggpubr)
library(STRINGdb)
library(reshape2)
library(ggforce)
library(ggpmisc)
library(RColorBrewer)
library(CRMetrics)
library(ggrepel)
library(circlize)
library(ComplexHeatmap)
library(stringr)
library(rstatix)
library(slingshot)
library(gamm4)
library(scITD)
library(ggraph)
## Comparisons
comp <- list(c("CTRL","MSA"),
c("CTRL","PD"),
c("MSA","PD"))
comp.long <- list(c("CTRL vs. MSA","CTRL vs. PD"),
c("CTRL vs. MSA","PD vs. MSA"),
c("CTRL vs. PD","PD vs. MSA"))
# Palettes
## Major celltypes
anno.neurons <- qread("anno_neurons.qs")
anno.neurons.major <- anno.neurons %>%
collapseAnnotation("GABA") %>%
collapseAnnotation("GLU") %>%
renameAnnotation("GABA", "Inh. neurons") %>%
renameAnnotation("GLU", "Exc. neurons") %>%
collapseAnnotation("MSN")
## Collapsing 11 labels containing 'GABA' in their name into one label.
## Collapsing 2 labels containing 'GLU' in their name into one label.
## Collapsing 3 labels containing 'MSN' in their name into one label.
anno <- qread("anno_major.qs")
anno.major <- anno[!names(anno) %in% names(anno.neurons.major) & !anno == "Neurons"] %>%
{factor(c(., anno.neurons.major))}
pal.major <- brewer.pal(n = 10, "Set1") %>%
c("lightblue3") %>%
setNames(levels(anno.major)[c(9,8,3,5,10,6:7,2,1,4)])
## Warning in brewer.pal(n = 10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Condition
pal.major %<>% c(c("yellow", brewer.pal(n = 6, "Accent")[5:6]) %>% setNames(c("CTRL","PD","MSA")))
## Comparisons
pal.major %<>% c(brewer.pal(n = 10, "Paired")[c(6,8,10)] %>% setNames(c("CTRL vs. MSA","CTRL vs. PD","PD vs. MSA")))
## Neurons
pal.major %<>% c(setNames(c("navy",
"mediumblue",
"lightslateblue",
"lightskyblue",
"lightblue",
"lightseagreen",
"blue",
"cadetblue1",
"cyan",
"cyan4",
"aquamarine2",
"lightcoral",
"brown1",
"orange",
"darkorange4",
"lightgoldenrod3"),
levels(anno.neurons)))
## Glia
anno.glia <- c("anno_astro.qs",
"anno_oligo.qs",
"anno_opc.qs") %>%
lapply(qread) %>%
Reduce(c, .) %>%
factor() %>%
renameAnnotation("Homeostatic_astrocytes", "AS_homeostatic") %>%
renameAnnotation("Reactive_astrocytes", "AS_reactive") %>%
renameAnnotation("Homeostatic_LINC01608", "OL_LINC01608") %>%
renameAnnotation("Homeostatic_SLC5A11", "OL_SLC5A11") %>%
renameAnnotation("Reactive_SCGZ", "OL_SGCZ")
pal.major %<>% c(setNames(c("pink",
pal.major["Astrocytes"],
pal.major["Oligodendrocytes"],
"chartreuse",
"darkolivegreen",
"brown4",
"coral"),
levels(anno.glia)))
## Microglia
anno.micro <- qread("anno_micro_pvm.qs") %>%
renameAnnotation("Steady-state","MIC_steady-state") %>%
renameAnnotation("Intermediate1","MIC_intermediate1") %>%
renameAnnotation("Intermediate2","MIC_intermediate2") %>%
renameAnnotation("Activated","MIC_activated")
pal.major %<>% c(setNames(c(pal.major["Microglia"],
"maroon4",
"magenta",
"pink3",
pal.major["PVMs"]),
levels(anno.micro)))
## Phago. assay
pal.major %<>% c(setNames(c("purple",
"black",
pal.major[c("CTRL","PD","MSA")]),
c("PBS",
"LPS",
"CTRL CSF",
"PD CSF",
"MSA CSF")))
## Houston assay
pal.major %<>% c(setNames(c(pal.major["PBS"],
pal.major["CTRL"],
"yellow3",
"orange",
pal.major["PD"],
"cyan4",
"navyblue",
pal.major["MSA"],
"pink3",
"red4"),
c("Microglia medium",
"CTRL CSF, no dil.",
"CTRL CSF, 1:2 dil.",
"CTRL CSF, 1:4 dil.",
"PD CSF, no dil.",
"PD CSF, 1:2 dil.",
"PD CSF, 1:4 dil.",
"MSA CSF, no dil.",
"MSA CSF, 1:2 dil.",
"MSA CSF, 1:4 dil.")))
# Load major Conos object
con <- qread("con_major.qs", nthreads = 10)
tt <- Sys.time()
Define helper functions
getOntWithFamily <- function(cao, comp, name = "GSEA") {
fams <- cao$test.results[[name]]$families
df.all <- list(cao$.__enclos_env__$private$getOntologyPvalueResults(name = name, genes = "up", p.adj = 1, q.value = 1),
cao$.__enclos_env__$private$getOntologyPvalueResults(name = name, genes = "down", p.adj = 1, q.value = 1)) %>%
bind_rows() %>%
mutate(logp = -log10(p.adjust),
comp = comp)
df <- cacoa:::getOntologyFamilyChildren(df.all, fams=fams, type = name, subtype = "BP")
return(list(df.all = df.all,
df = df,
fams = fams))
}
lianaCircos <- function(df,
top.interactions = 30,
text.size = 1,
pal = pal.major,
cell.types = c("Astrocytes",
"Immune",
"Microglia",
"Neurons",
"OPCs",
"Oligodendrocytes",
"PVMs",
"Pericytes/endothelial"),
big.gap = 5,
small.gap = 2,
arrow.width = 3,
link.ramp.rel = T,
link.sort = F,
scale = F,
arrow.head.width = 0.3,
arrow.head.length = 0.3,
link.ramp.col = c("navy", "grey", "firebrick")) {
input_df <- df %>%
slice_max(order_by = score, n = top.interactions) %>%
mutate(target = paste0(target, " ")) %>%
mutate(source_lig = paste0(source, "|", ligand),
target_rec = paste0(target, "|", receptor))
if (link.ramp.rel) {
arr_wd <- rep(arrow.width, nrow(input_df))
} else {
arr_wd <- (((input_df$score - min(input_df$score))/(max(input_df$score) - min(input_df$score))) * (arrow.width)) + 1
}
# Colors and segments
anno.col <- setNames(pal,
cell.types) %>%
c(., "Oligodendrocytes " = unname(.["Oligodendrocytes"]))
cell_cols <- anno.col[unique(c(unique(input_df$source), unique(input_df$target), "Oligodendrocytes "))]
link_cols <- c()
if (!link.ramp.rel) {
for (i in input_df$source_lig) {
link_cols <- c(link_cols, cell_cols[str_extract(i,
"[^|]+")])
}
} else {
input_df %<>%
arrange(score)
df.down <- input_df %>% filter(score <= 0)
link_down <- colorRampPalette(c(link.ramp.col[1], link.ramp.col[2]))(nrow(df.down))
df.up <- input_df %>% filter(score > 0)
link_up <- colorRampPalette(c(link.ramp.col[2], link.ramp.col[3]))(nrow(df.up))
link_cols <- c(link_down, link_up)
}
segments <- unique(c(paste0(input_df$source, "|", input_df$ligand),
paste0(input_df$target, "|", input_df$receptor)))
grp <- str_extract(segments, "[^|]+") %>%
setNames(segments)
# Redo colors
cell_cols2 <- grp
for (i in unique(grp)) {
cell_cols2[cell_cols2 == i] <- cell_cols[i]
}
# Plot
input_df %>%
select(source_lig, target_rec, score) %>%
chordDiagram(directional = 1,
group = grp,
scale = scale,
diffHeight = 0.005,
direction.type = c("arrows"),
link.arr.type = "triangle",
annotationTrack = c(),
preAllocateTracks = list(
list(track.height = 0.05),
list(track.height = 0.25),
list(track.height = 0.05)),
big.gap = big.gap,
transparency = 1,
link.arr.lwd = arr_wd,
link.arr.col = link_cols,
link.arr.length = arrow.head.length,
link.arr.width = arrow.head.width,
small.gap = small.gap
)
circos.track(track.index = 2, panel.fun = function(x, y) {
circos.text(CELL_META$xcenter,
CELL_META$ylim[1],
str_extract(CELL_META$sector.index, "[^|]+$"),
facing = "clockwise",
niceFacing = TRUE,
adj = c(0, 0.55),
cex = 1)
}, bg.border = NA)
# Split segments
for (l in segments) {
highlight.sector(l, track.index = 3, col = cell_cols2[l])
}
# Add ligand/receptor track
## Ligand
highlight.sector(input_df$source_lig,
track.index = 1,
col = "black",
text = "Ligands",
cex = 1,
text.col = "white",
niceFacing = TRUE)
## Receptor
highlight.sector(input_df$target_rec,
track.index = 1,
col = "white",
text = "Receptors",
cex = 1,
text.col = "black",
border = "black",
niceFacing = TRUE)
# Legends
minmax <- input_df %>%
pull(score) %>%
{pmax(abs(min(.)), max(.))} %>%
formatC(digits = 1) %>%
as.numeric()
col.range = c(-minmax, 0, minmax)
lgd_links = Legend(at = col.range,
col_fun = colorRamp2(col.range, link.ramp.col),
title_position = "topleft",
title = "Links")
lgd_ct <- Legend(labels = unique(c(input_df$source, input_df$target)),
title = "Cell type",
type = "points",
legend_gp = gpar(col = "transparent"),
background = cell_cols[unique(c(input_df$source, input_df$target))])
lgd_list_vertical = packLegend(lgd_ct, lgd_links)
draw(lgd_list_vertical,
just = c("left", "bottom"),
x = unit(5, "mm"),
y = unit(5, "mm"))
circos.clear()
}
getTscanTrajectory <- function(con, anno) {
requireNamespace("TSCAN", quietly = T)
emb <- con$embedding[names(anno), ]
anno %<>% .[rownames(emb)]
cent.ids <- emb %>%
rownames() %>%
split(anno)
centroids <- cent.ids %>%
lapply(\(cid) emb[cid, ]) %>%
lapply(colMeans) %>%
bind_rows() %>%
t() %>%
`colnames<-`(c("UMAP1","UMAP2"))
mst <- centroids %>%
TSCAN::createClusterMST(clusters = NULL)
line.data <- TSCAN::reportEdges(centroids, mst = mst, clusters = NULL)
return(line.data)
}
plotGenePseudoBulk <- function(gene, cm.pseudo, legend = T) {
idx <- cm.pseudo %>%
colnames() %>%
data.frame(id = .) %>%
mutate(condition = strsplit(id, "_|!!") %>% sget(1),
ct = strsplit(id, "!!") %>% sget(2)) %>%
mutate(ord = order(condition, ct))
x <- cm.pseudo %>%
.[match(gene, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
.[idx$ord]
plot.dat <- x %>%
{data.frame(sample = names(.),
value = unname(.))} %>%
mutate(anno = strsplit(sample, "!!") %>%
sget(2),
condition = strsplit(sample, "!!|_") %>%
sget(1)) %>%
mutate(anno = factor(anno))
stat.test <- plot.dat %>%
group_by(anno) %>%
rstatix::wilcox_test(value ~ condition) %>%
rstatix::add_xy_position(x = "anno", step.increase = 0.1)
p <- plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = condition)) +
theme_bw() +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
axis.ticks.x = element_line()) +
labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = paste0(gene, " expression")) +
scale_fill_manual(values = ggsci::pal_jama()(3)) +
stat_compare_means(aes(fill = condition), method = "kruskal", label = "p.signif") +
stat_pvalue_manual(stat.test)
if (!legend) p <- p + guides(fill = "none")
return(p)
}
cao_msa <- qread("cao_major_msa.qs", nthreads = 10)
cao_pd <- qread("cao_major_pd.qs", nthreads = 10)
cao_dis <- qread("cao_major_dis.qs", nthreads = 10)
con$plotGraph(groups = anno.major,
embedding = "UMAP_1_0.001_5",
size = 0.1,
palette = pal.major,
font.size = 3,
raster = T,
show.labels = T,
plot.na = F,
show.legend = T,
legend.title = "Cell type",
alpha = 0.05) +
dotSize(3) +
labs(x="UMAP1", y= "UMAP2") +
theme(line = element_blank()) +
ylim(c(-20, 15)) +
xlim(c(-21, 11))
cm.merged <- con$getJointCountMatrix()
markers <- c("AQP4","PTPRC","CSF1R","RBFOX3","SLC17A7","GAD1","PPP1R1B","MOG","VCAN","PDGFRB","MRC1")
dotPlot(markers,
cm.merged,
anno.major %>%
factor(., levels = sort(levels(.))[c(1,3,5,2,4,6,7:10)]),
cols = c("white","firebrick"),
gene.order = markers)
spc <- con$getDatasetPerCell()
cpc <- spc %>%
as.character() %>%
grepl.replace(c("CTRL","MSA","PD")) %>%
`names<-`(names(spc))
con$plotGraph(groups = cpc,
embedding = "UMAP_1_0.001_5",
size = 0.1,
alpha = 0.05,
palette = pal.major,
font.size = 3,
raster = F,
show.labels = T,
plot.na = F,
mark.groups = F,
show.legend = T) +
labs(x="UMAP1", y= "UMAP2", col = "") +
theme(legend.position = "bottom",
line = element_blank()) +
dotSize(3)
cao_msa$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nMSA")) +
labs(x = "", y = "")
cao_pd$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nPD")) +
geom_vline(xintercept = 7.5, col = "red") +
labs(x = "", y = "")
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
cao_dis$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nPD",0,"1\nMSA")) +
geom_vline(xintercept = 5.5, col = "red") +
labs(x = "", y = "")
cao_msa$cell.groups.palette <- pal.major
cao_msa$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
cao_pd$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
cao_pd$cell.groups.palette <- pal.major
cao_dis$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
cao_dis$cell.groups.palette <- pal.major
cao <- qread("cao_neurons.qs", nthreads = 10)
cao_msa <- qread("cao_neurons_msa.qs", nthreads = 10)
cao_pd <- qread("cao_neurons_pd.qs", nthreads = 10)
cao_dis <- qread("cao_neurons_dis.qs", nthreads = 10)
cao$plotEmbedding(groups = cao$cell.groups,
size = 0.1,
palette = pal.major,
font.size = 3,
raster = T,
show.labels = T,
plot.na = F,
alpha = 0.1) +
theme(line = element_blank()) +
labs(x="largeVis1", y= "largeVis2")
cm.merged <- cao$data.object$getJointCountMatrix()
markers <- c("GAD1","GAD2","PPP1R1B","SLC17A7","CHAT","CALB2","DCC","ID2","MEIS2","ST18","NKX2-1","NR2F2","PAX6","PVALB","RXFP1","SST","VIP","RELN","SATB2","DRD1","DRD2","FOXP2", "LRP8", "VLDLR")
dotPlot(markers,
cm.merged,
anno.neurons,
cols = c("white","firebrick"),
gene.order = markers)
cao_msa$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nMSA")) +
labs(x = "", y = "")
cao_pd$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nPD")) +
geom_vline(xintercept = 13.5, col = "red") +
labs(x = "", y = "")
cao_dis$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nPD",0,"1\nMSA")) +
labs(x = "", y = "")
cao_msa$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
cao_pd$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
cao_dis$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
sample.groups <- cao$sample.groups
cao$estimateCellDensity(method = "graph")
cao$estimateDiffCellDensity(type = "subtract")
cao$plotCellDensity(show.cell.groups = F, show.legend = F, color.range = c(0, 0.00035))$CTRL +
geom_circle(aes(x0 = 30.9, y0 = 10, r = 10)) +
geom_circle(aes(x0 = 17.5, y0 = 24, r = 4), col = "cyan3") +
theme(line = element_blank())
# MSA
cao$sample.groups <- sample.groups %>%
names() %>%
grepl.replace(c("CTRL","MSA","PD")) %>%
`names<-`(sample.groups %>% names()) %>%
.[. %in% c("CTRL","MSA")]
cao$target.level <- "MSA"
cao$estimateCellDensity(method = "graph", name = "msa.density")
cao$estimateDiffCellDensity(type = "subtract", name = "msa.density")
cao$plotCellDensity(show.cell.groups = F, name = "msa.density", show.legend = F, color.range = c(0, 0.00035))$MSA +
geom_circle(aes(x0 = 30.9, y0 = 10, r = 10)) +
geom_circle(aes(x0 = 17.5, y0 = 24, r = 4), col = "cyan3") +
theme(line = element_blank())
# PD
cao$sample.groups <- sample.groups %>%
names() %>%
grepl.replace(c("CTRL","MSA","PD")) %>%
`names<-`(sample.groups %>% names()) %>%
.[. %in% c("CTRL","PD")]
cao$target.level <- "PD"
cao$estimateCellDensity(method = "graph", name = "pd.density")
cao$estimateDiffCellDensity(type = "subtract", name = "pd.density")
cao$plotCellDensity(show.cell.groups = F, name = "pd.density", show.legend = T, color.range = c(0, 0.00035))$PD +
geom_circle(aes(x0 = 30.9, y0 = 10, r = 10)) +
geom_circle(aes(x0 = 17.5, y0 = 24, r = 4), col = "cyan3") +
theme(line = element_blank())
con.glia <- qread("con_oligo_astro_opc.qs", nthreads = 10)
cao_msa <- qread("cao_oligo_astro_opc_msa.qs", nthreads = 10)
cao_pd <- qread("cao_oligo_astro_opc_pd.qs", nthreads = 10)
cao_dis <- qread("cao_oligo_astro_opc_dis.qs", nthreads = 10)
con.glia$plotGraph(groups = anno.glia,
plot.na = F,
size = 0.1,
palette = pal.major,
font.size = 3,
raster = T,
show.labels = T, embedding = "largeVis_CPCA_AS01",
alpha = 0.05) +
labs(x = "largeVis1", y = "largeVis2") +
theme(line = element_blank())
cm.merged <- con.glia$getJointCountMatrix()
markers <- c("AQP4","TNC","MOG","LINC01608","SLC5A11","SGCZ","VCAN","OLIG2")
dotPlot(markers,
cm.merged,
anno.glia,
cols = c("white","firebrick"),
gene.order = markers)
cao_msa$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nMSA")) +
geom_vline(xintercept = 6.5, col = "red") +
labs(x = "", y = "")
## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
cao_pd$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nPD")) +
geom_vline(xintercept = 4.5, col = "red") +
labs(x = "", y = "")
## Warning: Removed 59 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
cao_dis$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
line = element_blank()) +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nPD",0,"1\nMSA")) +
geom_vline(xintercept = 6.5, col = "red") +
labs(x = "", y = "")
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
cao_msa$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
cao_pd$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
cao_dis$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 10914983 583.0 24378312 1302.0 24378312 1302.0
## Vcells 3017281295 23020.1 7084111497 54047.5 5888628205 44926.7
cao_msa <- qread("cao_astro_msa.qs", nthreads = 10)
cao_pd <- qread("cao_astro_pd.qs", nthreads = 10)
cao_dis <- qread("cao_astro_dis.qs", nthreads = 10)
df.all <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>%
lget("df.all") %>%
bind_rows() %>%
mutate(Group = paste0("AS_", strsplit(Group, "_") %>% sget(1) %>% tolower()))
## Loading required namespace: DOSE
##
# Select relevant pathways
p.ids <- c("GO:0050821", "GO:0050728", "GO:0006986", "GO:0030168", "GO:0030001", "GO:1904707", "GO:0140053", "GO:0006486", "GO:0010717", "GO:0031113", "GO:0032675", "GO:0061041", "GO:0071222", "GO:0045766", "GO:0006487", "GO:0097501", "GO:0042776", "GO:0045047", "GO:0044794", "GO:0035633", "GO:0097242", "GO:0006120", "GO:0042026", "GO:0006979", "GO:0071222", "GO:0017015", "GO:0009611", "GO:1901201", "GO:0042594", "GO:0097501", "GO:0006123", "GO:0006909", "GO:0007179", "GO:0006120", "GO:0097501")
ct <- "AS_homeostatic"
df.sel <- df.all %>%
filter(Group == ct)
p1 <- ggplot(df.sel, aes(NES, logp, col = comp, shape = comp)) +
geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.8) +
theme_bw() +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
scale_color_manual(values = pal.major) +
theme(line = element_blank()) +
labs(y = "-log10(adj. p)", shape = "Comparison", col = "Comparison") +
dotSize(3)
df <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>%
lget("df") %>%
bind_rows() %>%
mutate(Group = paste0("AS_", strsplit(Group, "_") %>% sget(1) %>% tolower())) %>%
filter(Group == ct,
ID %in% p.ids)
p2 <- df %>%
filter(p.adjust <= 0.05, NES > 0) %>%
mutate(Group = factor(Group, levels = sort(unique(Group)))) %>%
select(Description, NES, Group, comp) %>%
as.data.frame() %>%
group_by(comp, Group) %>%
group_split() %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = comp)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for up-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
p3 <- df %>%
filter(p.adjust <= 0.05, NES < 0) %>%
select(Description, NES, Group, comp) %>%
group_by(comp, Group) %>%
group_split() %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = comp)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for down-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))
ct <- "AS_reactive"
df.sel <- df.all %>%
filter(Group == ct)
p1 <- ggplot(df.sel, aes(NES, logp, col = comp, shape = comp)) +
geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.8) +
theme_bw() +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
scale_color_manual(values = pal.major) +
theme(line = element_blank()) +
labs(y = "-log10(adj. p)", shape = "Comparison", col = "Comparison") +
dotSize(3)
df <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>%
lget("df") %>%
bind_rows() %>%
mutate(Group = paste0("AS_", strsplit(Group, "_") %>% sget(1) %>% tolower())) %>%
filter(Group == ct,
ID %in% p.ids)
p2 <- df %>%
filter(p.adjust <= 0.05, NES > 0) %>%
mutate(Group = factor(Group, levels = sort(unique(Group)))) %>%
select(Description, NES, Group, comp) %>%
group_by(comp, Group) %>%
group_split() %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = comp)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for up-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
p3 <- df %>%
filter(p.adjust <= 0.05, NES < 0) %>%
select(Description, NES, Group, comp) %>%
group_by(comp, Group) %>%
group_split() %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = comp)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for down-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))
cao_msa <- qread("cao_oligo_msa.qs", nthreads = 10)
cao_pd <- qread("cao_oligo_pd.qs", nthreads = 10)
cao_dis <- qread("cao_oligo_dis.qs", nthreads = 10)
df.all <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>%
lget("df.all") %>%
bind_rows() %>%
mutate(Group = paste0("OL_", strsplit(Group, "_") %>% sget(2) %>% gsub("SCGZ", "SGCZ", .)))
# Select relevant pathways
p.ids <- c("GO:0071345", "GO:0045596", "GO:0060271", "GO:0048709", "GO:0006986", "GO:0016032", "GO:0060271", "GO:0042026", "GO:0006986", "GO:0120034", "GO:0071346", "GO:1904262", "GO:0042776", "GO:0061077", "GO:0050821", "GO:0045047", "GO:0042776", "GO:0050821", "GO:0045047", "GO:0007042", "GO:0042776", "GO:0061077", "GO:0045047", "GO:0042776", "GO:0045047", "GO:2000249", "GO:0002474", "GO:0042026", "GO:0002040", "GO:0046330", "GO:0043542", "GO:0030036", "GO:0032981", "GO:0007042")
ct <- "OL_SLC5A11"
df.sel <- df.all %>%
filter(Group == ct)
p1 <- ggplot(df.sel, aes(NES, logp, col = comp, shape = comp)) +
geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.8) +
theme_bw() +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
scale_color_manual(values = pal.major) +
theme(line = element_blank()) +
labs(y = "-log10(adj. p)", shape = "Comparison", col = "Comparison") +
dotSize(3)
df <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>%
lget("df") %>%
bind_rows() %>%
mutate(Group = paste0("OL_", strsplit(Group, "_") %>% sget(2) %>% gsub("SCGZ", "SGCZ", .))) %>%
filter(Group == ct,
ID %in% p.ids)
p2 <- df %>%
filter(p.adjust <= 0.05, NES > 0) %>%
mutate(Group = factor(Group, levels = sort(unique(Group)))) %>%
select(Description, NES, Group, comp) %>%
group_by(comp, Group) %>%
group_split() %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = comp)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for up-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
p3 <- df %>%
filter(p.adjust <= 0.05, NES < 0) %>%
select(Description, NES, Group, comp) %>%
group_by(comp, Group) %>%
group_split() %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = comp)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for down-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))
cm.merged <- con$getJointCountMatrix(raw = T) %>%
Matrix::t() %>%
.[, colnames(.) %in% names(anno.glia)]
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.glia %>%
.[!is.na(.)] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(),
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo %<>%
{. + 1} %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_") %>%
sget(1) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# For DE between visits
genes <- "TLR1"
idx <- cm.pseudo %>%
colnames() %>%
data.frame(id = .) %>%
mutate(condition = strsplit(id, "_|!!") %>% sget(1),
ct = strsplit(id, "!!") %>% sget(2)) %>%
mutate(ord = order(condition, ct))
x <- cm.pseudo %>%
.[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
.[idx$ord]
plot.dat <- x %>%
{data.frame(sample = names(.),
value = unname(.))} %>%
mutate(anno = strsplit(sample, "!!") %>%
sget(2),
condition = strsplit(sample, "!!|_") %>%
sget(1)) %>%
filter(anno %in% c("OL_LINC01608", "OL_SGCZ", "OL_SLC5A11")) %>%
mutate(anno = factor(anno))
stat.test <- plot.dat %>%
group_by(anno) %>%
rstatix::wilcox_test(value ~ condition) %>%
rstatix::add_xy_position(x = "anno", step.increase = 0.1, fun = "mean_sd", scales = "free")
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = condition)) +
theme_bw() +
theme(line = element_blank(),
axis.ticks.x = element_line(),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = "TLR1 expression") +
scale_fill_manual(values = ggsci::pal_jama()(3)) +
stat_compare_means(aes(fill = condition), method = "kruskal", label = "p.signif", label.y = 8.5e2) +
stat_pvalue_manual(stat.test)
## Warning in stat_compare_means(aes(fill = condition), method = "kruskal", :
## Ignoring unknown aesthetics: fill
# For DE between visits
genes <- "LRP1"
idx <- cm.pseudo %>%
colnames() %>%
data.frame(id = .) %>%
mutate(condition = strsplit(id, "_|!!") %>% sget(1),
ct = strsplit(id, "!!") %>% sget(2)) %>%
mutate(ord = order(condition, ct))
x <- cm.pseudo %>%
.[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
.[idx$ord]
plot.dat <- x %>%
{data.frame(sample = names(.),
value = unname(.))} %>%
mutate(anno = strsplit(sample, "!!") %>%
sget(2),
condition = strsplit(sample, "!!|_") %>%
sget(1)) %>%
filter(anno %in% c("OL_LINC01608", "OL_SGCZ", "OL_SLC5A11")) %>%
mutate(anno = factor(anno))
stat.test <- plot.dat %>%
group_by(anno) %>%
rstatix::wilcox_test(value ~ condition) %>%
rstatix::add_xy_position(x = "anno", step.increase = 0.1)
plot.dat %>%
ggplot(aes(anno, value)) +
geom_boxplot(aes(fill = condition)) +
theme_bw() +
theme(line = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
axis.ticks.x = element_line()) +
labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = "LRP1 expression") +
scale_fill_manual(values = ggsci::pal_jama()(3)) +
stat_compare_means(aes(fill = condition), method = "kruskal", label = "p.signif", label.y = 175) +
stat_pvalue_manual(stat.test) +
ylim(c(0, 180))
## Warning in stat_compare_means(aes(fill = condition), method = "kruskal", :
## Ignoring unknown aesthetics: fill
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bracket()`).
For calculation of data, see Liana.ipynb.
dat <- read.delim("liana_res.csv",
sep = ",",
header = T) %>%
mutate(group = strsplit(sample, "_") %>%
sget(1))
dat.plot <- dat %>%
dplyr::rename(ligand = ligand_complex,
receptor = receptor_complex,
score = lrscore) %>%
filter(target == "Oligodendrocytes",
ligand %in% c("BMP1", "SERPINE2", "PSAP", "APOE", "APP", "SERPING1"),
receptor %in% c("LRP1", "BMPR1A", "LRP4")) %>%
group_by(group, ligand, receptor, source, target) %>%
summarize(score = mean(score)) %>%
ungroup() %>%
arrange(group, source, target, ligand, receptor)
## `summarise()` has grouped output by 'group', 'ligand', 'receptor', 'source'.
## You can override using the `.groups` argument.
dat.msa <- dat.plot %>%
filter(group == "MSA",
score > 0.39) %>%
mutate(lrst = paste0(ligand, receptor, source, target))
dat.pd <- dat.plot %>%
mutate(lrst = paste0(ligand, receptor, source, target)) %>%
filter(group == "PD",
lrst %in% dat.msa$lrst)
dat.rel <- dat.msa %>%
mutate(score = score - dat.pd$score)
dat.rel %>%
lianaCircos()
cao <- qread("cao_micro_pvm.qs", nthreads = 10)
cao_msa <- qread("cao_micro_pvm_msa.qs", nthreads = 10)
cao_pd <- qread("cao_micro_pvm_pd.qs", nthreads = 10)
cao_dis <- qread("cao_micro_pvm_dis.qs", nthreads = 10)
sample.groups <- cao$sample.groups
cao$plotEmbedding(groups = anno.micro,
size = 0.5,
palette = pal.major,
font.size = 3,
raster = T,
mark.groups = T,
plot.na = F,
alpha = 0.2) +
labs(x="UMAP1", y= "UMAP2") +
theme(line = element_blank())
cm.merged <- cao$data.object$getJointCountMatrix()
markers <- c("AIF1","F13A1","MRC1","CD163","CD74")
dotPlot(markers,
cm.merged,
cao$cell.groups,
cols = c("white","firebrick"))
anno.sort <- anno.micro[rownames(cao$data.object$embedding)]
anno.sel <- anno.sort[!anno.sort %in% c("MIC_intermediate1", "PVMs")] %>% factor()
anno.sel2 <- anno.sort[!anno.sort %in% c("MIC_intermediate2", "MIC_activated")] %>% factor()
ldata1 <- getTscanTrajectory(cao$data.object, anno.sel)
ldata2 <- getTscanTrajectory(cao$data.object, anno.sel2)
ldata <- rbind(ldata1, ldata2)
cao$data.object$embedding %>%
as.data.frame() %>%
mutate(., unname(anno.micro[rownames(.)])) %>%
setNames(c("UMAP1", "UMAP2", "annotation")) %>%
ggplot() +
geom_point(aes(UMAP1, UMAP2, col = annotation), size = 0.3) +
geom_line(data = ldata, mapping=aes(UMAP1, UMAP2, group = edge), linewidth = 1) +
theme_bw() +
theme(legend.position = "right",
line = element_blank()) +
labs(col = "", x = "UMAP1", y = "UMAP2") +
scale_colour_manual(values = pal.major) +
dotSize(3)
Prepare data
emb <- cao$data.object$embedding %>%
`colnames<-`(c("UMAP1","UMAP2")) %>%
.[rownames(.) %in% names(anno.sel2), ]
sds_obj <- slingshot(emb,
anno.sel2,
start.clus = "MIC_steady-state",
stretch = 0
)
sds <- as.SlingshotDataSet(sds_obj)
pseudotime <- sds_obj@assays@data@listData$pseudotime[, 1]
Here, we show the code for running the generalized additive mixed
model. It takes a significant amount of time to run, we provide data in
pseudotime_micro_l2.qs.
mat <- cao$data.object$getJointCountMatrix() %>%
.[match(names(sds_obj), rownames(.)), colSums(.) > 2e2] %>%
.[, !grepl(pattern = "RPL|RPS|MT-", colnames(.))]
mt <- mat %>%
as.matrix() %>%
as.data.frame() %>%
tibble::rownames_to_column() %>%
{message("Mutating"); mutate(.,
pseudotime = pseudotime[rowname],
condition = cao$data.object$getDatasetPerCell()[rowname] %>%
as.character() %>%
strsplit("_") %>%
sget(1),
sample = strsplit(rowname, "!!") %>%
sget(1))} %>%
.[complete.cases(.), ] %>%
{message("Melting"); reshape2::melt(., id.vars = c("rowname", "pseudotime", "condition", "sample"))} %$%
{message("Splitting"); split(., variable)}
res <- mt %>%
sccore::plapply(\(x) {
fit_full <- gamm4(data = x, formula = value ~ s(pseudotime, by = factor(condition)), random = ~(1 | sample), REML = F)
fit_reduced <- gamm4(data = x, formula = value ~ s(pseudotime), random = ~(1 | sample), REML = F)
fit_none <- gamm4(data = x, formula = value ~ 1, random = ~(1 | sample), REML = F)
ann <- anova(fit_full$mer, fit_reduced$mer, fit_none$mer)
if (any(ann$`Pr(>Chisq)` <= 0.05, na.rm = T)) {
residuals <- predict(fit_full$gam, se.fit = T)$fit %>%
unname()
r.sq <- c(summary(fit_full$gam)$r.sq, summary(fit_reduced$gam)$r.sq) %>%
setNames(c("full", "reduced"))
out <- list(anova = ann,
residuals = residuals,
r.sq = r.sq)
return(out)
}
}, n.cores = 40, mc.preschedule = T, mc.cleanup = T, progress = F) %>%
.[!sapply(., is.null)]
qsave(res, "pseudotime_micro_l2.qs")
cao$data.object$embedding %>%
as.data.frame() %>%
mutate(., unname(anno.micro[rownames(.)])) %>%
setNames(c("UMAP1", "UMAP2", "annotation")) %>%
mutate(., pseudotime = pseudotime[rownames(.)]) %>%
ggplot() +
geom_point(aes(UMAP1, UMAP2, col = pseudotime), size = 0.3) +
geom_line(data = ldata2, aes(UMAP1, UMAP2, group = edge), size = 1) +
theme_bw() +
theme(line = element_blank()) +
scale_color_gradient(low = "navyblue", high = "orange") +
labs(col = "Pseudotime", x = "UMAP1", y = "UMAP2")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Please note, row order may change per iteration.
res <- qread("pseudotime_micro_l2.qs")
rsq.pseudo <- res %>%
sapply(\(gene) gene$r.sq[1]) %>%
sort(decreasing = T)
res.filter <- res[names(rsq.pseudo)[seq(50)] %>% strsplit(".", fixed = T) %>% sget(1)]
cm.merged <- cao$data.object$getJointCountMatrix() %>%
.[match(names(pseudotime), rownames(.)), colnames(.) %in% names(res.filter)] %>%
.[rowSums(.) > 0, ]
## Predict smoothend expression
pseudotime.both <- pseudotime[rownames(cm.merged)]
weights.both <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.both), rownames(.)), 1] + 1E-7
scFit <- cm.merged %>%
Matrix::t() %>%
tradeSeq::fitGAM(pseudotime = pseudotime.both, cellWeights = weights.both, verbose = T)
Smooth <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged), tidy = F, n=100)
# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))
# Seriate the results
Smooth <- Smooth[ seriation::get_order(seriation::seriate(Smooth, method="PCA_angle")), ]
# Create heatmap
col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))
Heatmap(Smooth,
col = col_fun,
cluster_columns=F,
cluster_rows=F,
show_column_names = F,
row_names_gp = grid::gpar(fontsize = 5))
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 13219335 706 30836541 1646.9 30836541 1646.9
## Vcells 1571545387 11990 5440648830 41508.9 8457158301 64523.0
We provide this figure here since all data are already loaded.
cpc <- pseudotime %>%
names() %>%
strsplit("_") %>%
sget(1) %>%
factor()
res[rownames(Smooth)] %>%
lget("residuals") %>%
lapply(as.data.frame) %>%
lapply(setNames, "residuals") %>%
lapply(mutate, group = cpc, pseudotime = pseudotime) %>%
data.table::rbindlist(idcol = "gene") %>%
ggplot(aes(pseudotime, residuals, col = group)) +
geom_smooth() +
theme_bw() +
facet_wrap(~gene, ncol = 5, scales = "free")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
cao_msa$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none") +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nMSA")) +
geom_vline(xintercept = 4.5, col = "red") +
labs(x = "", y = "") +
theme(line = element_blank())
cao_pd$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none") +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nCTRL",0,"1\nPD")) +
geom_vline(xintercept = 4.5, col = "red") +
labs(x = "", y = "") +
theme(line = element_blank())
## Warning: Removed 273 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
cao_dis$plotCellLoadings(show.pvals = F,
alpha = 0)$data %>%
ggplot(aes(ind, values, fill = ind)) +
geom_hline(yintercept = 0, col = "black") +
geom_violin() +
coord_flip() +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none") +
scale_y_continuous(breaks = c(-1,0,1),
limits = c(-1,1),
labels = c("-1\nPD",0,"1\nMSA")) +
geom_vline(xintercept = 3.5, col = "red") +
labs(x = "", y = "") +
theme(line = element_blank())
## Warning: Removed 311 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 12785520 682.9 30836541 1646.9 30836541 1646.9
## Vcells 1426358775 10882.3 4352519064 33207.1 8457158301 64523.0
We provide combined data from the RNAscope experiments as a single
file res.qs.
res <- qread("RNAscope.qs") %>%
filter(target == "AIF1", Ch1NumSpots > 0) %>%
arrange(file)
area <- read.table("RNAscope_areas.tsv", sep = "\t", header = T) %>% # Million pixels
mutate(file = paste(File.name, Tissue, sep = "")) %>%
filter(file %in% res$file) %>%
arrange(file) %>%
mutate(rel_px = px.2 / 1E6)
p1 <- res %>%
group_by(group, target, file) %>%
summarize(spots = mean(Ch1NumSpots)) %>%
ggplot(aes(group, spots, fill = group)) +
geom_boxplot() +
geom_jitter(width = 0.2) +
theme_bw() +
labs(y = "Mean no. spots per double-positive cell", x = "") +
stat_compare_means(method = "kruskal.test", label.y = 115) +
stat_compare_means(comparisons = comp, method = "wilcox.test") +
theme(line = element_blank()) +
guides(fill = "none") +
scale_fill_manual(values = pal.major)
## `summarise()` has grouped output by 'group', 'target'. You can override using
## the `.groups` argument.
p2 <- res %>%
group_by(group, target, file) %>%
summarize(no_cells = n()) %>%
as.data.frame() %>%
arrange(file) %>%
mutate(rel_cells = no_cells / area$rel_px) %>%
ggplot(aes(group, rel_cells, fill = group)) +
geom_boxplot(outliers = F) +
geom_jitter(width = 0.2) +
theme_bw() +
stat_compare_means(method = "kruskal.test", label.y = 17) +
stat_compare_means(comparisons = comp, method = "wilcox.test") +
labs(y = "No. double-positive cells\nNormalized to area", x = "") +
theme(line = element_blank()) +
guides(fill = "none") +
scale_fill_manual(values = pal.major)
## `summarise()` has grouped output by 'group', 'target'. You can override using
## the `.groups` argument.
plot_grid(plotlist = list(p1, p2), ncol = 1)
fams <- cao_msa$test.results[["GSEA"]]$families
df.all <- list(cao_msa$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "up", p.adj = 1, q.value = 1),
cao_msa$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "down", p.adj = 1, q.value = 1)) %>%
bind_rows() %>%
mutate(logp = -log10(p.adjust))
# Relevant pathways
p.ids <- c("GO:0060760", "GO:0002230", "GO:0019915", "GO:1904646", "GO:0042775", "GO:0019646", "GO:0033108", "GO:0035455", "GO:0061077", "GO:0006914", "GO:0042026", "GO:0034340", "GO:0051607", "GO:0060337", "GO:0035455", "GO:0002684", "GO:0036037", "GO:0048514")
df <- cacoa:::getOntologyFamilyChildren(df.all, fams=fams, type = "GSEA", subtype = "BP") %>%
mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .)) %>%
filter(ID %in% p.ids)
df.all %<>% mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .))
p1 <- ggplot(df.all, aes(NES, logp, col = Group, shape = Group)) +
geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.5) +
theme_bw() +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
scale_color_manual(values = pal.major) +
theme(line = element_blank()) +
labs(y = "-log10(adj. p)") +
dotSize(3)
p2 <- df %>%
filter(p.adjust <= 0.05, NES > 0) %>%
mutate(Group = factor(Group, levels = sort(unique(Group)))) %>%
select(Description, NES, Group) %$%
split(., Group) %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
dplyr::slice(seq(25)) %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = Group)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for up-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
p3 <- df %>%
filter(p.adjust <= 0.05, NES < 0) %>%
select(Description, NES, Group) %>%
dplyr::slice(seq(20)) %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = Group)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for down-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 12258404 654.7 30836541 1646.9 30836541 1646.9
## Vcells 1282521939 9784.9 3482015252 26565.7 8457158301 64523.0
fams <- cao_pd$test.results[["GSEA"]]$families
df.all <- list(cao_pd$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "up", p.adj = 1, q.value = 1),
cao_pd$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "down", p.adj = 1, q.value = 1)) %>%
bind_rows() %>%
mutate(logp = -log10(p.adjust))
# Select relevant pathways
p.ids <- c("GO:0042776", "GO:0034341", "GO:0001916", "GO:0002503", "GO:0019885", "GO:0016042", "GO:0001909", "GO:0051085", "GO:0045824", "GO:0044000", "GO:0019885", "GO:0044242", "GO:0001944", "GO:0034976", "GO:0019646", "GO:0009205", "GO:0019883", "GO:0070972", "GO:0002474")
df <- cacoa:::getOntologyFamilyChildren(df.all, fams=fams, type = "GSEA", subtype = "BP") %>%
mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .)) %>%
filter(ID %in% p.ids)
df.all %<>% mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .))
p1 <- ggplot(df.all, aes(NES, logp, col = Group, shape = Group)) +
geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.5) +
theme_bw() +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
scale_color_manual(values = pal.major) +
theme(line = element_blank()) +
labs(y = "-log10(adj. p)") +
dotSize(3)
p2 <- df %>%
filter(p.adjust <= 0.05, NES > 0) %>%
select(Description, NES, Group) %$%
split(., Group) %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
dplyr::slice(seq(25)) %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = Group)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for up-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
p3 <- df %>%
filter(p.adjust <= 0.05, NES < 0) %>%
select(Description, NES, Group) %$%
split(., Group) %>%
lapply(dplyr::slice, seq(5)) %>%
bind_rows() %>%
dplyr::slice(seq(25)) %>%
mutate(., x = 0, y = rev(seq(nrow(.)))) %>%
ggplot(aes(x, y, label = Description, col = Group)) +
geom_text(hjust = 0) +
theme_void() +
xlim(0, 1) +
labs(title = "Selected terms for down-regulated genes") +
scale_color_manual(values = pal.major) +
guides(col = "none")
plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 11637292 621.5 30836541 1646.9 30836541 1646.9
## Vcells 1080375935 8242.7 3482015252 26565.7 8457158301 64523.0
Figures 6c,e,f where made with GraphPad Prism. Data are available in
Phagocytosis_assay_triplicates_raw_data_HH.tsv (Copenhagen
experiment) or HOUSTON.tsv (Houston experiment).
dat <- read.delim("Phagocytosis_assay_triplicates_raw_data_HH.tsv",
header = T, dec = ",") %>%
tidyr::pivot_longer(names_to = "group", cols = -1, values_to = "value") %>%
mutate(group = gsub(".1|.2", "", group) %>% factor(labels = c("PBS","LPS","MSA CSF","CTRL CSF","PD CSF"))) %>%
mutate(group = factor(group, levels = c("PBS","LPS","CTRL CSF","MSA CSF","PD CSF"))) %>%
group_by(Time, group) %>%
summarize(mean = mean(value, na.rm = T),
sd = sd(value, na.rm = T))
## `summarise()` has grouped output by 'Time'. You can override using the
## `.groups` argument.
dat %>%
ggplot(aes(Time, mean, group=group, color=group)) +
geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.1) +
geom_point(size = 0.5) +
theme_bw() +
labs(x = "Time (hours)", y = "pHrodo-labelled E. coli uptake - intensity ratio [AU]", col = "Stimulation") +
scale_color_manual(values = pal.major) +
theme(line = element_blank())
dat.raw <- read.table("Cytokine_summary.tsv", header = TRUE, sep = "\t", dec = ",")
dat.tmp <- dat.raw %>%
melt(id.vars = c("Sample","Group","Condition")) %>%
mutate(variable = variable %>%
as.character() %>%
gsub(".", "-", ., fixed = T) %>%
as.factor()) %>%
filter(Condition == "Media", !Group %in% c("PBS","LPS")) %>%
filter(Condition == "Media") %>%
mutate(Group = Group %>% factor(levels = c("CTRL","MSA","PD")),
variable = variable %>% factor())
dat.stat <- dat.raw %>%
select(!IL.8) %>%
mutate(Group = factor(Group, levels = c("PBS","LPS","CTRL","MSA","PD"))) %>%
filter(!Group %in% c("PBS","LPS"))
res.il10 <- FSA::dunnTest(IL10 ~ Group, data = dat.stat %>% {`colnames<-`(., gsub(".", "", colnames(.), fixed = T))}, method = "bh")
## Registered S3 methods overwritten by 'FSA':
## method from
## confint.boot car
## hist.boot car
stat.test <- dat.tmp %>%
filter(variable == "IL-10") %>%
select(Group, value) %>%
as.data.frame() %>%
rstatix::wilcox_test(value ~ Group) %>%
mutate(p = res.il10$res$P.unadj,
p.adj = res.il10$res$P.adj %>% formatC(digits = 2),
p.adj.signif = c("*","ns","ns")) %>%
rstatix::add_xy_position(x = "Group")
dat.tmp %>%
filter(variable == "IL-10") %>%
ggplot(aes(Group, value)) +
geom_point(aes(col = Group), size = 3, position = position_jitter(width = 0.3)) +
theme_bw() +
theme(line = element_blank()) +
stat_pvalue_manual(stat.test, label = "p.adj", tip.length = 0.01, backet.nudge.y = 3, hide.ns = F) +
scale_color_manual(values = pal.major) +
labs(x = "", y = "IL-10 pg/mL") +
guides(col = "none")
dat.melt <- read.table("Microglia+CSF_samples_aSyn_sCD163.tsv", header = T, sep = "\t", dec = ",") %>%
melt(id.vars = c("diagnosis", "sex")) %>%
mutate(diagnosis = factor(diagnosis, levels = c("CTRL","MSA","IPD"), labels = c("CTRL","MSA","PD")))
dat.melt %>%
filter(variable == "sCD163_ng.mL_CSF") %>%
ggplot(aes(diagnosis, value)) +
geom_boxplot(aes(fill = diagnosis)) +
theme_bw() +
labs(x = "", y = "sCD163 ng/ml", fill = "Diagnosis") +
scale_fill_manual(values = pal.major) +
stat_compare_means(method = "t.test", comparisons = list(c("CTRL","MSA"), c("MSA","PD"), c("CTRL","PD"))) +
guides(fill = "none") +
theme(line = element_blank())
dat.melt %>%
filter(variable == "aSyn_pg.mL") %>%
ggplot(aes(diagnosis, value)) +
geom_boxplot(aes(fill = diagnosis)) +
theme_bw() +
labs(x = "", y = "aSyn pg/ml", fill = "Diagnosis") +
scale_fill_manual(values = pal.major) +
stat_compare_means(method = "t.test", comparisons = list(c("CTRL","MSA"), c("MSA","PD"), c("CTRL","PD"))) +
guides(fill = "none") +
theme(line = element_blank())
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_signif()`).
dat.melt %>%
filter(variable %in% c("aSyn_pg.mL","sCD163_ng.mL_CSF")) %>%
select(-sex) %>%
mutate(id = rep(seq(63), 2)) %>%
tidyr::pivot_wider(names_from = variable, values_from = value) %>%
ggplot(aes(aSyn_pg.mL, sCD163_ng.mL_CSF)) +
geom_point(aes(col = diagnosis)) +
theme_bw() +
labs(x = "aSyn pg/ml", y = "sCD163, ng/ml", col = "") +
geom_smooth(method = MASS::rlm, se = FALSE, color = "black") +
stat_poly_eq(aes(label = paste(after_stat(rr.label), after_stat(p.value.label), sep = "*\", \"*")), color = "black") +
scale_color_manual(values = pal.major) +
theme(line = element_blank())
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_poly_eq()`).
## Warning: Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).
Here, we use data from Rydbirk et al. on differentially expressed proteins in the soluble fraction of CSF pools.
dat <- read.delim("Soluble_fraction.txt")
msa.dep <- dat %>%
filter(Disease.group == "MSA") %>%
pull(Gene.name)
pd.dep <- dat %>%
filter(Disease.group == "PD") %>%
pull(Gene.name)
overlap <- intersect(msa.dep, pd.dep)
highlight_proteins <- c("CTSS", "FCGR2A", "LAMP1", "CTSD", "CLTC")
msa.dep %<>% .[!. %in% overlap]
pd.dep %<>% .[!. %in% overlap]
overlap %<>% .[!. %in% highlight_proteins]
string_db <- STRINGdb$new(
version = "12.0", # or latest available
species = 9606, # 9606 = Homo sapiens
score_threshold = 400, # confidence score cutoff
input_directory = ""
)
genes <- c(msa.dep, pd.dep, overlap)
mapped_genes <- string_db$map(
data.frame(gene = genes),
"gene",
removeUnmappedRows = TRUE
)
## Warning: we couldn't map to STRING 5% of your identifiers
# Get interactions
interactions <- string_db$get_interactions(mapped_genes$STRING_id)
# Convert to igraph object
ppi_graph <- graph_from_data_frame(interactions, directed = FALSE)
# Create label mapping: STRING ID to gene name
string_to_gene <- mapped_genes %>%
select(STRING_id, gene) %>%
distinct()
# Add labels to graph nodes
V(ppi_graph)$label <- string_to_gene$gene[match(V(ppi_graph)$name, string_to_gene$STRING_id)]
custom_colors <- c(
"CTSS" = "firebrick",
"FCGR2A" = "purple2",
"LAMP1" = "purple2",
"CTSD" = "navyblue",
"CLTC" = "navyblue",
setNames(rep("tomato", length(msa.dep)), msa.dep),
setNames(rep("steelblue", length(pd.dep)), pd.dep),
setNames(rep("orchid", length(overlap)), overlap))
# Apply color mapping to node attributes
V(ppi_graph)$node_color <- custom_colors[V(ppi_graph)$label]
# Get node labels for each edge endpoint
edge_ends <- ends(ppi_graph, es = E(ppi_graph), names = FALSE)
source_labels <- V(ppi_graph)$label[edge_ends[, 1]]
target_labels <- V(ppi_graph)$label[edge_ends[, 2]]
# Assign edge width
E(ppi_graph)$edge_width <- ifelse(
source_labels %in% highlight_proteins & target_labels %in% highlight_proteins,
0.5, 0.1
)
# Compute node sizes based on label
V(ppi_graph)$node_size <- sapply(V(ppi_graph)$label, \(x) if (x %in% overlap) 3 else if (x %in% highlight_proteins) 5 else 2)
V(ppi_graph)$label_type <- ifelse(
V(ppi_graph)$label %in% highlight_proteins,
"highlight", "other"
)
# Plot the network with size and color
ggraph(ppi_graph, layout = "fr") +
geom_edge_link(aes(width = I(edge_width)), alpha = 0.4) +
geom_node_point(aes(color = I(node_color), size = I(node_size)), show.legend = FALSE) +
geom_node_label(data = function(x) { x[x$label_type == "highlight", ] },
aes(label = label), size = 3, label.padding = unit(0.15, "lines"), repel = TRUE, show.legend = F) +
geom_node_text(data = function(x) { x[x$label_type == "other", ] },
aes(label = label), size = 2, repel = TRUE, show.legend = F) +
theme_void()
samples <- anno.major %>%
names() %>%
strsplit("!!") %>%
sget(1) %>%
unique()
crm <- qread("crm.qs",
nthreads = 10)
mtp <- crm$selectMetrics(ids = c(1:4,6,19))
mtp %>%
lapply(\(met) crm$plotSummaryMetrics(metrics = met, comp.group = "sample", second.comp.group = "group") +
scale_fill_manual(values = pal.major) +
labs(x = "") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())) %>%
plot_grid(plotlist = ., labels = letters[1:6], ncol = 2)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
crm$plotSummaryMetrics(metrics = crm$selectMetrics(ids = 20), comp.group = "sample", second.comp.group = "group") +
scale_fill_manual(values = pal.major) +
labs(x = "") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "right")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
crm$plotFilteredCells(doublet.method = "doubletdetection", depth.cutoff = 5e2, size = 0.2, alpha = 0.1) +
dotSize(3)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
crm$plotFilteredCells(type = "bar", doublet.method = "doubletdetection", depth.cutoff = 5e2)$data %>%
filter(sample != "MSA_1406") %>%
mutate(sample = strsplit(sample, "_") %>%
sget(1)) %$%
split(., sample) %>%
lapply(\(x) split(x, x$filter)) %>%
lapply(lapply, \(df) mutate(df, sample = paste0(sample, seq(nrow(df))))) %>%
lapply(bind_rows) %>%
bind_rows() %>%
mutate(sample = factor(sample, levels = c(paste0("CTRL", seq(10)), paste0("MSA", seq(7)), paste0("PD", seq(12))))) %>%
ggplot(aes(sample, pct, fill = filter)) +
geom_bar(stat = "identity") +
geom_text_repel(aes(label = sprintf("%0.2f", round(pct, digits = 2))),
position = position_stack(vjust = 0.5),
direction = "y",
size = 2.5) +
crm$theme +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "", y = "Percentage cells filtered") +
theme(legend.position = "bottom")
These figures cannot be reproduced here due to GDPR. Leaving the code for visibility.
crm <- qread("crm.qs", nthreads = 10)
crm$plotCbCells(samples = crm$metadata$sample %>% as.character() %>% .[. != "MSA_1406"]) +
scale_x_discrete(labels = c(paste("CTRL", seq(10)), paste("MSA", seq(7)), paste("PD", seq(12)))) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
crm$plotCbAmbGenes() +
theme(line = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
axis.ticks.x = element_line())
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 18084106 965.8 50841538 2715.3 50841538 2715.3
## Vcells 1042913724 7956.9 2785612202 21252.6 8457158301 64523.0
c("RBFOX3","MOG","VCAN","AQP4","CSF1R","PDGFRB","PTPRC","MRC1","GAD1","SLC17A7","PPP1R1B") %>%
sort() %>%
lapply(function(g) con$plotGraph(embedding = "UMAP", gene=g, title=g, size=0.1, plot.na = F, palette = colorRampPalette(c("grey90","firebrick"))) + theme(line = element_blank())) %>%
cowplot::plot_grid(plotlist=., ncol=2)
con$embedding <- con$embeddings$UMAP
sample.groups <- con$samples %>%
names() %>%
setNames(grepl.replace(., c("CTRL","MSA","PD")), .)
cao <- Cacoa$new(data.object = con,
sample.groups = ifelse(grepl("CTRL", sample.groups), "CTRL", "DISEASE") %>%
setNames(names(sample.groups)),
cell.groups = anno.major,
ref.level = "CTRL",
target.level = "DISEASE",
n.cores = 32)
meta <- read.delim("metadata.tsv", sep = "\t") %>%
filter(sample %in% names(con$samples)) %>%
tibble::column_to_rownames("sample") %>%
dplyr::select(-condition)
meta %<>%
mutate(sample = rownames(.))
spc <- con$getDatasetPerCell()
mpc <- spc %>%
data.frame(sample = ., cid = names(.))
for (cc in colnames(meta)) {
mpc[[cc]] <- meta[[cc]][match(mpc$sample, meta$sample)]
}
mpc$subtype[mpc$subtype == ""] <- NA
p <- con$plotGraph(colors = setNames(mpc$age, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, title = "Age", size = 0.3, alpha = 0.3) +
scale_color_gradient(low = "grey", high = "firebrick") +
theme(line = element_blank()) +
labs(col = "Years")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p
p <- con$plotGraph(colors = setNames(mpc$pmi, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, title = "PMI", size = 0.3, alpha = 0.3) +
scale_color_gradient(low = "grey", high = "firebrick") +
theme(line = element_blank()) +
labs(col = "Hours")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p
p <- con$plotGraph(colors = setNames(mpc$disease_duration, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, title = "Disease duration", size = 0.3, alpha = 0.3) +
scale_color_gradient(low = "grey", high = "firebrick") +
theme(line = element_blank()) +
labs(col = "Years")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p
p <- con$plotGraph(groups = setNames(mpc$sample, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Sample", size = 0.3, alpha = 0.3) +
theme(line = element_blank()) + dotSize(3)
p
p <- con$plotGraph(groups = setNames(mpc$brain_bank, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Brain bank", size = 0.3, alpha = 0.3) +
theme(line = element_blank()) + dotSize(3)
p
p <- con$plotGraph(groups = setNames(mpc$sex, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Sex", size = 0.3, alpha = 0.3) +
theme(line = element_blank()) + dotSize(3)
p
p <- con$plotGraph(groups = setNames(mpc$subtype, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Subtype", size = 0.3, alpha = 0.3) +
theme(line = element_blank()) + dotSize(3)
p
p <- con$plotGraph(groups = setNames(mpc$extract, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Extract", size = 0.3, alpha = 0.3) +
theme(line = element_blank()) + dotSize(3)
p
p <- con$plotGraph(groups = setNames(mpc$flowcell, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Flowcell", size = 0.3, alpha = 0.3) +
theme(line = element_blank()) + dotSize(3)
p
con_pd <- con$clone()
con_pd$embedding <- con$embeddings$UMAP
con_pd$samples <- con$samples %>% .[!grepl("MSA", names(.))]
sample.groups <- con_pd$samples %>%
names() %>%
setNames(grepl.replace(., c("CTRL","PD")), .)
cao_pd <- Cacoa$new(data.object = con_pd,
sample.groups = ifelse(grepl("CTRL", sample.groups), "CTRL", "PD") %>%
setNames(names(sample.groups)),
cell.groups = anno.major,
ref.level = "CTRL",
target.level = "PD",
n.cores = 32)
cao_pd$estimateExpressionShiftMagnitudes()
## Filtering data...
## Excluding cell types Exc. neurons that don't have enough samples
## done!
## Calculating pairwise distances using dist='cor'...
## Done!
cao_pd$estimateMetadataSeparation(sample.meta = meta %>% filter(!grepl("MSA", sample)) %>% dplyr::select(-disease_duration, -subtype, -sample),
space = "expression.shifts")
cowplot::plot_grid(plotlist = list(
cao_pd$plotSampleDistances(space = "expression.shifts", show.sample.size = T, method = "UMAP", title = "Condition"),
cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$age %>% setNames(rownames(meta)), title = "Age"),
cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$brain_bank %>% setNames(rownames(meta)), show.sample.size = T, title = "Brain bank"),
cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$extract %>% setNames(rownames(meta)), show.sample.size = T, title = "Extract batch"),
cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$flowcell %>% setNames(rownames(meta)), show.sample.size = T, title = "Flowcell"),
cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$pmi %>% setNames(rownames(meta)), title = "PMI"),
cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$sex %>% setNames(rownames(meta)), show.sample.size = T, title = "Sex"),
cao_pd$test.results$metadata.separation$pvalues %>%
data.frame(p = .) %>%
mutate(pp = -log10(p)) %>%
tibble::rownames_to_column("covariate") %>%
ggplot(aes(covariate, pp)) +
geom_bar(stat = "identity", fill = "lightblue4") +
ylim(0, 2) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(x = "", y = "-log10(adj. p)", title = "Association significance") +
geom_hline(yintercept = -log10(0.05), colour = "red3")
), ncol = 4) -> p
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
p
con_msa <- con$clone()
con_msa$embedding <- con$embeddings$UMAP
con_msa$samples <- con$samples %>% .[!grepl("PD", names(.))]
sample.groups <- con_msa$samples %>%
names() %>%
setNames(grepl.replace(., c("CTRL","MSA")), .)
cao_msa <- Cacoa$new(data.object = con_msa,
sample.groups = ifelse(grepl("CTRL", sample.groups), "CTRL", "MSA") %>%
setNames(names(sample.groups)),
cell.groups = anno.major,
ref.level = "CTRL",
target.level = "MSA",
n.cores = 32)
cao_msa$estimateExpressionShiftMagnitudes()
## Filtering data...
## Excluding cell types Exc. neurons that don't have enough samples
## done!
## Calculating pairwise distances using dist='cor'...
## Done!
cao_msa$estimateMetadataSeparation(sample.meta = meta %>% filter(!grepl("PD", sample)) %>% dplyr::select(-brain_bank, -sample),
space = "expression.shifts")
cowplot::plot_grid(plotlist = list(
cao_msa$plotSampleDistances(space = "expression.shifts", show.sample.size = T, method = "UMAP", title = "Condition"),
cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$age %>% setNames(rownames(meta)), title = "Age"),
cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$disease_duration %>% setNames(rownames(meta)), show.sample.size = T, title = "Disease duration"),
cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$extract %>% setNames(rownames(meta)), show.sample.size = T, title = "Extract batch"),
cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$flowcell %>% setNames(rownames(meta)), show.sample.size = T, title = "Flowcell"),
cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$pmi %>% setNames(rownames(meta)), title = "PMI"),
cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$sex %>% setNames(rownames(meta)), show.sample.size = T, title = "Sex"),
cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$subtype %>% setNames(rownames(meta)), show.sample.size = T, title = "Subtype"),
cao_msa$test.results$metadata.separation$pvalues %>%
data.frame(p = .) %>%
mutate(pp = -log10(p)) %>%
tibble::rownames_to_column("covariate") %>%
ggplot(aes(covariate, pp)) +
geom_bar(stat = "identity", fill = "lightblue4") +
ylim(0, 2) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(x = "", y = "-log10(adj. p)", title = "Association significance") +
geom_hline(yintercept = -log10(0.05), colour = "red3")
), ncol = 5) -> p
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
p
con_dis <- con$clone()
con_dis$embedding <- con$embeddings$UMAP
con_dis$samples <- con$samples %>% .[!grepl("CTRL", names(.))]
sample.groups <- con_dis$samples %>%
names() %>%
setNames(grepl.replace(., c("PD","MSA")), .)
cao_dis <- Cacoa$new(data.object = con_dis,
sample.groups = ifelse(grepl("PD", sample.groups), "PD", "MSA") %>%
setNames(names(sample.groups)),
cell.groups = anno.major,
ref.level = "PD",
target.level = "MSA",
n.cores = 32)
cao_dis$estimateExpressionShiftMagnitudes()
## Filtering data...
## Excluding cell types Exc. neurons that don't have enough samples
## done!
## Calculating pairwise distances using dist='cor'...
## Done!
cao_dis$estimateMetadataSeparation(sample.meta = meta %>% filter(!grepl("CTRL", sample)) %>% dplyr::select(-sample),
space = "expression.shifts")
cowplot::plot_grid(plotlist = list(
cao_dis$plotSampleDistances(space = "expression.shifts", show.sample.size = T, method = "UMAP", title = "Condition"),
cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$age %>% setNames(rownames(meta)), title = "Age"),
cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$brain_bank %>% setNames(rownames(meta)), show.sample.size = T, title = "Brain bank"),
cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$disease_duration %>% setNames(rownames(meta)), show.sample.size = T, title = "Disease duration"),
cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$extract %>% setNames(rownames(meta)), show.sample.size = T, title = "Extract batch"),
cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$flowcell %>% setNames(rownames(meta)), show.sample.size = T, title = "Flowcell"),
cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$pmi %>% setNames(rownames(meta)), title = "PMI"),
cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$sex %>% setNames(rownames(meta)), show.sample.size = T, title = "Sex"),
cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$subtype %>% setNames(rownames(meta)), show.sample.size = T, title = "Subtype"),
cao_dis$test.results$metadata.separation$pvalues %>%
data.frame(p = .) %>%
mutate(pp = -log10(p)) %>%
tibble::rownames_to_column("covariate") %>%
ggplot(aes(covariate, pp)) +
geom_bar(stat = "identity", fill = "lightblue4") +
ylim(0, 2) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(x = "", y = "-log10(adj. p)", title = "Association significance") +
geom_hline(yintercept = -log10(0.05), colour = "red3")
), ncol = 5) -> p
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
p
To investigate effects from co-variates, we ran scITD on all cell types for each comparison. We don’t determine factors here as it takes a significant amount of time, but we keep the code in the first instance.
We tested different levels of var_scale_power, here we’re just showing var_scale_power = 0.5 as we didn’t find any effects except for OPCs. Per default, we set vargenes_method=“norm_var_pvals” and adjusted vargenes_thresh to obtain around 1,000 var. genes, but in some instances we took the top most variable genes instead.
con.tmp <- qread("cao_neurons.qs", nthreads = 10)$data.object
anno <- qread("anno_neurons.qs")
anno %<>%
collapseAnnotation("GABA") %>%
collapseAnnotation("GLU") %>%
renameAnnotation("GABA", "Inh. neurons") %>%
renameAnnotation("GLU", "Exc. neurons") %>%
collapseAnnotation("MSN")
## Collapsing 11 labels containing 'GABA' in their name into one label.
## Collapsing 2 labels containing 'GLU' in their name into one label.
## Collapsing 3 labels containing 'MSN' in their name into one label.
cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>%
Matrix::t() %>%
.[!grepl("MT-|RPS|RPL", rownames(.)), ]
meta.all <- con.tmp$getDatasetPerCell() %>%
data.frame(donors = .) %>%
mutate(ctypes = anno[rownames(.)] %>% unname()) %>%
.[complete.cases(.), ]
metadata.all <- read.delim("metadata.tsv")
for (cc in colnames(metadata.all)[-1]) {
meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}
# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()
meta <- meta.all %>%
filter(condition != "PD") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "PD")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)],
meta_data=meta,
params=param_list,
label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw %>% .[, colnames(.) %in% :
## Cell type names cannot have special characters '.', '_', or ':'. Removing these
## characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=1,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var_pvals',
vargenes_thresh=0.7,
scale_var = TRUE,
var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 10 donors. All donors have at least 1 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1179
# get assistance with rank determination
itd_container %<>% determine_ranks_tucker(max_ranks_test=c(10,15),
shuffle_level='cells',
num_iter=10,
norm_method='trim',
scale_factor=10000,
scale_var=TRUE,
var_scale_power=0.5)
itd_container$plots$rank_determination_plot
itd_container %<>% run_tucker_ica(ranks=c(5,10),
tucker_type = 'regular',
rotation_type = 'hybrid')
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
show_donor_ids = TRUE,
add_meta_associations="pval")
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
meta <- meta.all %>%
filter(condition != "MSA") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "MSA")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw,
meta_data=meta,
params=param_list,
label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=1,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var',
vargenes_thresh=500,
scale_var = TRUE,
var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 9 donors. All donors have at least 1 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1084
itd_container %<>% run_tucker_ica(ranks=c(5,10),
tucker_type = 'regular',
rotation_type = 'hybrid')
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
show_donor_ids = TRUE,
add_meta_associations='pval')
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
meta <- meta.all %>%
filter(condition != "CTRL") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "CTRL")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw,
meta_data=meta,
params=param_list,
label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=1,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var',
vargenes_thresh=500,
scale_var = TRUE,
var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 7 donors. All donors have at least 1 cells in each cell type included.
## Consider using fewer cell types or reducing the donor_min_cells parameter to include more donors.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1084
itd_container %<>% run_tucker_ica(ranks=c(5,10),
tucker_type = 'regular',
rotation_type = 'hybrid')
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
show_donor_ids = TRUE,
add_meta_associations='pval')
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
con.tmp <- qread("con_astrocytes.qs", nthreads = 10)
anno <- qread("anno_astro.qs")
cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>%
.[, !grepl("MT-|RPS|RPL", colnames(.))] %>%
Matrix::t()
meta.all <- con.tmp$getDatasetPerCell() %>%
.[names(.) %in% names(anno)] %>%
data.frame(donors = .) %>%
mutate(ctypes = anno[rownames(.)] %>% unname()) %>%
.[complete.cases(.), ]
metadata.all <- read.delim("metadata.tsv")
for (cc in colnames(metadata.all)[-1]) {
meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}
# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()
meta <- meta.all %>%
filter(condition != "PD") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "PD")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)],
meta_data=meta,
params=param_list,
label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw %>% .[, colnames(.) %in% :
## Cell type names cannot have special characters '.', '_', or ':'. Removing these
## characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var_pvals',
vargenes_thresh=0.05,
scale_var = TRUE,
var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 17 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 704
itd_container %<>% run_tucker_ica(ranks=c(5,8),
tucker_type = 'regular',
rotation_type = 'hybrid')
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
show_donor_ids = TRUE,
add_meta_associations='pval')
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
meta <- meta.all %>%
filter(condition != "MSA") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "MSA")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw,
meta_data=meta,
params=param_list,
label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var_pvals',
vargenes_thresh=.05,
scale_var = TRUE,
var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 22 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 871
itd_container %<>% run_tucker_ica(ranks=c(5,9),
tucker_type = 'regular',
rotation_type = 'hybrid')
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), # Omitting brain bank
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), # Omitting brain bank
show_donor_ids = TRUE,
add_meta_associations='pval')
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
meta <- meta.all %>%
filter(condition != "CTRL") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "CTRL")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw,
meta_data=meta,
params=param_list,
label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var_pvals',
vargenes_thresh=.05,
scale_var = TRUE,
var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 19 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 701
itd_container %<>% run_tucker_ica(ranks=c(6,7),
tucker_type = 'regular',
rotation_type = 'hybrid')
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
show_donor_ids = TRUE,
add_meta_associations='pval')
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
con.tmp <- qread("cao_micro_pvm.qs", nthreads = 10)$data.object
anno <- qread("anno_micro_pvm.qs")
cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>%
Matrix::t() %>%
.[!grepl("MT-|RPS|RPL", rownames(.)), ]
meta.all <- con.tmp$getDatasetPerCell() %>%
data.frame(donors = .) %>%
mutate(ctypes = anno[rownames(.)] %>% unname()) %>%
.[complete.cases(.), ]
metadata.all <- read.delim("metadata.tsv")
for (cc in colnames(metadata.all)[-1]) {
meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}
# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()
meta <- meta.all %>%
filter(condition != "PD") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "PD")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)],
meta_data=meta,
params=param_list,
label_donor_sex = F)
itd_container %<>% form_tensor(donor_min_cells=3,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var',
vargenes_thresh=500,
scale_var = TRUE,
var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 10 donors. All donors have at least 3 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1114
itd_container %<>% run_tucker_ica(ranks=c(3,7),
tucker_type = 'regular',
rotation_type = 'hybrid')
factors <- c('sex','pmi', "age", "flowcell", "extract")
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test = factors,
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars= factors,
show_donor_ids = TRUE,
add_meta_associations='pval')
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
meta <- meta.all %>%
filter(condition != "MSA") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "MSA")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw,
meta_data=meta,
params=param_list,
label_donor_sex = F)
itd_container %<>% form_tensor(donor_min_cells=3,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var',
vargenes_thresh=500,
scale_var = TRUE,
var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 18 donors. All donors have at least 3 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1012
itd_container %<>% run_tucker_ica(ranks=c(4,10),
tucker_type = 'regular',
rotation_type = 'hybrid')
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
show_donor_ids = TRUE,
add_meta_associations='pval')
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
meta <- meta.all %>%
filter(condition != "CTRL") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "CTRL")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw,
meta_data=meta,
params=param_list,
label_donor_sex = F)
itd_container %<>% form_tensor(donor_min_cells=3,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var',
vargenes_thresh=500,
scale_var = TRUE,
var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 14 donors. All donors have at least 3 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1001
itd_container %<>% run_tucker_ica(ranks=c(4,10),
tucker_type = 'regular',
rotation_type = 'hybrid')
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
show_donor_ids = TRUE,
add_meta_associations='pval')
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
con.tmp <- qread("con_oligodendrocytes.qs", nthreads = 10)
anno <- qread("anno_oligo.qs")
cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>%
.[rownames(.) %in% names(anno), !grepl("MT-|RPS|RPL", colnames(.))] %>%
Matrix::t()
meta.all <- con.tmp$getDatasetPerCell() %>%
data.frame(donors = .) %>%
mutate(ctypes = anno[rownames(.)] %>% unname()) %>%
.[complete.cases(.), ]
metadata.all <- read.delim("metadata.tsv")
for (cc in colnames(metadata.all)[-1]) {
meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}
# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()
meta <- meta.all %>%
filter(condition != "PD") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "PD")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)],
meta_data=meta,
params=param_list,
label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw %>% .[, colnames(.) %in% :
## Cell type names cannot have special characters '.', '_', or ':'. Removing these
## characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var_pvals',
vargenes_thresh=0.05,
scale_var = TRUE,
var_scale_power = 1) # Diminishable effect at 0.5, but not on any other level
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 17 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1209
itd_container %<>% run_tucker_ica(ranks=c(6,9),
tucker_type = 'regular',
rotation_type = 'hybrid')
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
show_donor_ids = TRUE,
add_meta_associations='pval')
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
meta <- meta.all %>%
filter(condition != "MSA") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "MSA")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw,
meta_data=meta,
params=param_list,
label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var_pvals',
vargenes_thresh=.05,
scale_var = TRUE,
var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 21 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1505
itd_container %<>% run_tucker_ica(ranks=c(4,8),
tucker_type = 'regular',
rotation_type = 'hybrid')
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
show_donor_ids = TRUE,
add_meta_associations='pval')
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
meta <- meta.all %>%
filter(condition != "CTRL") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "CTRL")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw,
meta_data=meta,
params=param_list,
label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var_pvals',
vargenes_thresh=.05,
scale_var = TRUE,
var_scale_power = 1) # Diminishable effect at 0.5, but not on any other level
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 18 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1146
itd_container %<>% run_tucker_ica(ranks=c(5,6),
tucker_type = 'regular',
rotation_type = 'hybrid')
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
show_donor_ids = TRUE,
add_meta_associations='pval')
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
con.tmp <- qread("con_opcs.qs", nthreads = 10)
anno <- qread("anno_opc.qs")
cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>%
Matrix::t() %>%
.[!grepl("MT-|RPS|RPL", rownames(.)), ]
meta.all <- con.tmp$getDatasetPerCell() %>%
data.frame(donors = .) %>%
mutate(ctypes = anno[rownames(.)] %>% unname()) %>%
.[complete.cases(.), ]
metadata.all <- read.delim("metadata.tsv")
for (cc in colnames(metadata.all)[-1]) {
meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}
# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()
meta <- meta.all %>%
filter(condition != "PD") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "PD")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)],
meta_data=meta,
params=param_list,
label_donor_sex = F)
itd_container %<>% form_tensor(donor_min_cells=5,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var_pvals',
vargenes_thresh=0.2,
scale_var = TRUE,
var_scale_power = 0.5) # Diminishable effect at 2, but not on any other level
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 17 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 778
itd_container %<>% run_tucker_ica(ranks=c(4,5),
tucker_type = 'regular',
rotation_type = 'hybrid')
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
show_donor_ids = TRUE,
add_meta_associations='pval')
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
meta <- meta.all %>%
filter(condition != "MSA") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "MSA")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw,
meta_data=meta,
params=param_list,
label_donor_sex = F)
itd_container %<>% form_tensor(donor_min_cells=5,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var_pvals',
vargenes_thresh=.4,
scale_var = TRUE,
var_scale_power = 1) # No effect at 0.5, but 1, 1.5, 2
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 21 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 833
itd_container %<>% run_tucker_ica(ranks=c(4,6),
tucker_type = 'regular',
rotation_type = 'hybrid')
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
show_donor_ids = TRUE,
add_meta_associations='pval')
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
meta <- meta.all %>%
filter(condition != "CTRL") %>%
mutate(donors = factor(donors),
ctypes = factor(ctypes))
metadata <- metadata.all %>%
filter(condition != "CTRL")
cm.raw <- cm.raw.all %>%
.[, colnames(.) %in% rownames(meta)]
# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
ncores = 31,
rand_seed = 10)
# create project container
itd_container <- make_new_container(count_data=cm.raw,
meta_data=meta,
params=param_list,
label_donor_sex = F)
itd_container %<>% form_tensor(donor_min_cells=5,
norm_method='trim',
scale_factor=10000,
vargenes_method='norm_var_pvals',
vargenes_thresh=.4,
scale_var = TRUE,
var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 18 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 617
itd_container %<>% run_tucker_ica(ranks=c(5,6),
tucker_type = 'regular',
rotation_type = 'hybrid')
# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
stat_use='pval')
# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
show_donor_ids = TRUE,
add_meta_associations='pval')
# show the donor scores heatmap
p <- itd_container$plots$donor_matrix
p
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 18310488 977.9 50841538 2715.3 50841538 2715.3
## Vcells 1087513380 8297.1 3851063268 29381.3 8457158301 64523.0
We’re calculating cosine similarities to annotated data from Garma et al.. The data are available here.
Here, we’re showing how we prepared the data. This will not be run again.
lp.raw <- loomR::connect("Interneurons_raw.loom", mode = "r+", skip.validate = T)
cm.raw <- lp.raw[["matrix"]][,] %>%
`dimnames<-`(list(
lp.raw[["col_attrs/obs_names"]][],
lp.raw[["row_attrs/var_names"]][]
))
lp.raw$close()
lp.norm <- loomR::connect("Interneurons_processed.loom", mode = "r+", skip.validate = T)
anno <- setNames(
lp.norm$col.attrs$`IN subclass`[],
lp.norm$col.attrs$obs_names[]
) %>%
factor()
spc <- setNames(
lp.norm$col.attrs$sample[],
lp.norm$col.attrs$obs_names[]
) %>%
factor()
origin <- setNames(
lp.norm$col.attrs$Origin[],
lp.norm$col.attrs$obs_names[]
) %>%
factor()
area <- setNames(
lp.norm$col.attrs$Region[],
lp.norm$col.attrs$obs_names[]
) %>%
factor()
sex <- setNames(
lp.norm$col.attrs$Sex[],
lp.norm$col.attrs$obs_names[]
) %>%
factor()
umap <- `rownames<-`(lp.norm$col.attrs$X_umap[,] %>% t(),
lp.norm$col.attrs$obs_names[])
lp.norm$close()
Then, we need to prepare a Conos object of those data. This will not be run here.
preprocessed <- spc %>%
split(names(.), .) %>%
.[sapply(., length) > 50] %>%
lapply(\(cid) cm.raw[cid, ]) %>%
lapply(Matrix::t) %>%
lapply(basicP2proc, get.largevis = F, get.tsne = F, make.geneknn = F, n.cores = 20)
con <- Conos$new(preprocessed, n.cores = 32)
con$embeddings$PCA$UMAP <- umap
con$embedding <- umap
con$clusters$leiden$groups <- anno
con$clusters$sex$groups <- sex
con$clusters$area$groups <- area
con$clusters$origin$groups <- origin
con$clusters$spc$groups <- spc
qsave(con, "con_garma.qs", nthreads = 10)
# Our data
rydbirk.neurons.anno <- qread("anno_neurons.qs") %>%
factor(labels = paste("Rydbirk", levels(.), sep = "-"))
rydbirk.neurons <- qread("cao_neurons.qs", nthreads = 10)$data.object
rydbirk.neurons.pseudo <- rydbirk.neurons$getJointCountMatrix(raw = T) %>%
sccore::collapseCellsByType(groups = rydbirk.neurons.anno, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
rydbirk.neurons.pseudo %<>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("-") %>%
sget(1) %>%
data.frame() %>%
`dimnames<-`(list(colnames(rydbirk.neurons.pseudo), "group")),
design = ~ 1) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
# Garma data
con.garma <- qread("con_garma.qs", nthreads = 10)
garma.neurons.anno <- con.garma$clusters$anno$groups %>%
factor() %>%
factor(labels = paste("Garma", levels(.), sep = "-"))
garma.neurons.pseudo <- con.garma$getJointCountMatrix(raw = T) %>%
sccore::collapseCellsByType(groups = garma.neurons.anno, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
garma.neurons.pseudo %<>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_") %>%
sget(1) %>%
data.frame() %>%
`dimnames<-`(list(colnames(garma.neurons.pseudo), "group")),
design = ~ 1) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
# We rotate Garma into Rydbirk sample PCA space
cm.rydbirk <- rydbirk.neurons.pseudo %>%
as.data.frame() %>%
filter(rowSums(.) > 0)
cm.garma <- garma.neurons.pseudo %>%
as.data.frame() %>%
filter(rowSums(.) > 0)
genesToKeep <- conos:::getOdGenesUniformly(append(con.garma$samples, rydbirk.neurons$samples), 50) %>%
intersect(rownames(cm.rydbirk)) %>%
intersect(rownames(cm.garma))
pc.res <-
cm.rydbirk[genesToKeep, ] %>%
t() %>%
prcomp(center = T,
scale = T)
pc.tmp <- cm.garma[genesToKeep, ] %>%
as.data.frame() %>%
filter(rowSums(.) > 0) %>%
t() %>%
scale(pc.res$center, pc.res$scale) %*% pc.res$rotation
dat.plot <- rbind(pc.res$x, pc.tmp) %>%
data.frame() %>%
mutate(id = rownames(.)) %>%
mutate(study = ifelse(grepl("Rydbirk", id), "Rydbirk", "Garma") %>% factor(),
anno = strsplit(id, "-") %>% sget(2))
lsa::cosine(dat.plot %>%
select(-id, -study, -anno) %>%
t()) %>%
reshape2::melt() %>%
ggplot(aes(Var1, Var2, fill = value)) +
geom_tile() +
scale_fill_gradient2(low = "skyblue3", mid = "white", high = "deeppink3") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Top 50 OD genes, Garma PCA rotation into Rydbirk PCA space")
cm.garma.raw <- con.garma$getJointCountMatrix(raw = T)
cm.rydbirk.raw <- rydbirk.neurons$getJointCountMatrix(raw = T)
idx <- intersect(colnames(cm.garma.raw), colnames(cm.rydbirk.raw))
cm.garma.ext <- cm.garma.raw %>%
sccore:::extendMatrix(idx)
cm.rydbirk.ext <- cm.rydbirk.raw %>%
sccore:::extendMatrix(idx)
cm.comb <- rbind(cm.garma.ext, cm.rydbirk.ext)
anno.comb <- c(garma.neurons.anno %>% .[names(.) %in% rownames(cm.comb)], rydbirk.neurons.anno %>% .[names(.) %in% rownames(cm.comb)]) %>%
factor()
cm.comb %<>% .[rownames(.) %in% names(anno.comb), ]
unique(
c("GAD1","GAD2","PPP1R1B","SLC17A7","CHAT","CALB2","DCC","ID2","MEIS2","ST18","NKX2-1","NR2F2","PAX6","PVALB","RXFP1","SST","VIP","RELN","SATB2","DRD1","DRD2","FOXP2", "LRP8", "VLDLR") %>%
c("CCK", "VIP", "CXCL14", "CHAT", "PTHLH", "MOXD1", "CHST9", "TAC3", "SST", "NPY", "PVALB", "GRIK3", "DACH1", "GAD1", "GAD2")
) %>%
sccore::dotPlot(cm.comb, anno.comb) + scale_color_gradient(low = "grey80", high = "firebrick")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 19630122 1048.4 50841538 2715.3 50841538 2715.3
## Vcells 1534868907 11710.2 3851063268 29381.3 8457158301 64523.0
We’re calculating cosine similarities to annotated data from Kamath et al.. The data are available here.
cm <- Matrix::readMM("GSE178265_Homo_matrix.mtx") %>%
`dimnames<-`(list(
read.delim("GSE178265_Homo_features.tsv", header = F)$V1,
read.delim("GSE178265_Homo_bcd.tsv", header = F)$V1
))
metadata.raw <- read.delim("METADATA_PD.tsv", header = T)[-1, ]
cells.to.omit <- metadata.raw %>%
filter(donor_id %in% c("NIH1", "Rat1", "TreeShrew1")) %>%
pull(NAME)
meta.per.cell <- metadata.raw %>%
filter(!NAME %in% cells.to.omit)
meta.per.sample <- metadata.raw %>%
filter(!NAME %in% cells.to.omit) %>%
select(-NAME, -libname, -biosample_id) %>%
mutate(area = grepl.replace(organ__ontology_label, c("substantia nigra pars compacta", "caudate nucleus"), c("SN", "CN"))) %>%
mutate(donor_area = paste(donor_id, area, sep = "-")) %>%
.[match(unique(.$donor_area), .$donor_area), ]
cids.per.donor.area <- meta.per.cell %>%
mutate(area = grepl.replace(organ__ontology_label, c("substantia nigra pars compacta", "caudate nucleus"), c("SN", "CN"))) %>%
mutate(donor_area = paste(donor_id, area, sep = "-")) %$%
split(NAME, donor_area)
cm.list <- meta %>%
sccore::plapply(\(mm) cm[, colnames(cm) %in% rownames(mm)], n.cores = 7)
cm.area.donor <- cm.list %>%
lapply(\(area) sccore::plapply(cids.per.donor.area, \(cid) area[, colnames(area) %in% cid], n.cores = 22)) %>%
lapply(\(cms) cms[sapply(cms, ncol) > 30]) %>%
lapply(lapply, as, "CsparseMatrix") %>%
lapply(lapply, basicP2proc, get.largevis = F, get.tsne = F, make.geneknn = F, n.cores = 20)
# Annotations
embeddings <- dir(pattern = "UMAP")
embeddings %<>%
lapply(\(type) read.delim(type, header = T, row.names = 1)) %>%
setNames(embeddings %>% strsplit("_") %>% sget(1)) %>%
lapply(dplyr::rename, subtype = Cell_Type)
con.list <- names(embeddings) %>%
lapply(\(nn) {
con <- Conos$new(cm.area.donor[[nn]], n.cores = 32)
con$embeddings <- list(UMAP = embeddings[[nn]][, -3])
con$embedding <- con$embeddings[[1]]
con$clusters <- list(leiden = list(groups = embeddings[[nn]] %>% {setNames(pull(., subtype), rownames(.))}))
return(con)
}) %>%
setNames(names(embeddings))
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 22104408 1180.6 50841538 2715.3 50841538 2715.3
## Vcells 6161919585 47011.8 8108351500 61861.9 8457158301 64523.0
# Our data
rydbirk.neurons.anno <- qread("anno_neurons.qs") %>%
factor(labels = paste("Rydbirk", levels(.), sep = "-"))
rydbirk.neurons <- qread("cao_neurons.qs", nthreads = 10)$data.object
rydbirk.neurons.pseudo <- rydbirk.neurons$getJointCountMatrix(raw = T) %>%
sccore::collapseCellsByType(groups = rydbirk.neurons.anno, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
rydbirk.neurons.pseudo %<>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("-") %>%
sget(1) %>%
data.frame() %>%
`dimnames<-`(list(colnames(rydbirk.neurons.pseudo), "group")),
design = ~ 1) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
# Kamath data
kamath.neurons.anno1 <- con.list$da$clusters$leiden$groups %>%
factor() %>%
factor(labels = paste("Kamath", levels(.), sep = "-"))
kamath.neurons.anno2 <- con.list$nonda$clusters$leiden$groups %>%
factor() %>%
factor(labels = paste("Kamath", levels(.), sep = "-"))
kamath.neurons.anno <- factor(c(kamath.neurons.anno1, kamath.neurons.anno2))
kamath.cm1 <- con.list$da$getJointCountMatrix(raw = T)
kamath.cm2 <- con.list$nonda$getJointCountMatrix(raw = T)
kamath.neurons.pseudo <- rbind(kamath.cm1, kamath.cm2) %>%
sccore::collapseCellsByType(groups = kamath.neurons.anno, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
kamath.neurons.pseudo %<>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_") %>%
sget(1) %>%
data.frame() %>%
`dimnames<-`(list(colnames(kamath.neurons.pseudo), "group")),
design = ~ 1) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
# We rotate Kamath into Rydbirk sample PCA space
cm.rydbirk <- rydbirk.neurons.pseudo %>%
as.data.frame() %>%
filter(rowSums(.) > 0)
cm.kamath <- kamath.neurons.pseudo %>%
as.data.frame() %>%
filter(rowSums(.) > 0)
genesToKeep <- conos:::getOdGenesUniformly(append(con.list$da$samples, rydbirk.neurons$samples) %>% append(con.list$nonda$samples), 100) %>%
intersect(rownames(cm.rydbirk)) %>%
intersect(rownames(cm.kamath))
pc.res <-
cm.rydbirk[genesToKeep, ] %>%
t() %>%
prcomp(center = T,
scale = T)
pc.tmp <- cm.kamath[genesToKeep, ] %>%
as.data.frame() %>%
filter(rowSums(.) > 0) %>%
t() %>%
scale(pc.res$center, pc.res$scale) %*% pc.res$rotation
# Plot
dat.plot <- rbind(pc.res$x, pc.tmp) %>%
data.frame() %>%
mutate(id = rownames(.)) %>%
mutate(study = ifelse(grepl("Rydbirk", id), "Rydbirk", "Kamath") %>% factor(),
anno = strsplit(id, "-") %>% sget(2))
lsa::cosine(dat.plot %>%
select(-id, -study, -anno) %>%
t()) %>%
reshape2::melt() %>%
ggplot(aes(Var1, Var2, fill = value)) +
geom_tile() +
scale_fill_gradient2(low = "skyblue3", mid = "white", high = "deeppink3") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Top 100 OD genes, Kamath PCA rotation into Rydbirk PCA space")
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 20779329 1109.8 50841538 2715.3 50841538 2715.3
## Vcells 5971632546 45560.0 11676202160 89082.4 10605578349 80914.2
Here, we use public data from Smajic et al.. We isolated neurons and used the available annotation (IPDCO_hg_midbrain_cell.tsv).
con.smajic <- qread("smajic_neurons.qs", nthreads = 10)
con.neurons <- qread("cao_neurons.qs", nthreads = 10)$data.object
# Get Smajic annotation
tmp <- data.frame(cid = rownames(con.smajic$embedding)) %>%
mutate(barcode = strsplit(cid, "_") %>%
sget(3))
anno <- data.table::fread("IPDCO_hg_midbrain_cell.tsv", header = T) %>%
mutate(barcode = strsplit(barcode, "_") %>%
sget(1)) %>%
mutate(cid = tmp$cid[match(barcode, tmp$barcode)]) %>%
filter(!is.na(cid)) %>%
pull(cell_ontology, cid) %>%
.[!. %in% c("Astrocytes", "Endothelial cells", "Ependymal", "Microglia", "Oligodendrocytes", "OPCs", "Pericytes")] %>%
factor()
cowplot::plot_grid(plotlist = list(
con.smajic$plotGraph(groups = anno, title = "Smajic neurons, annotation", font.size = 3, shuffle.colors = T, plot.na = F) + theme(line = element_blank()),
con.neurons$plotGraph(groups = anno.neurons, title = "Rydbirk neurons, annotation", font.size = 3) + theme(line = element_blank()),
con.smajic$plotGraph(groups = anno, gene = "RELN", title = "RELN expression", plot.na = F) + theme(line = element_blank()),
con.neurons$plotGraph(gene = "RELN", title = "RELN expression", plot.na = F) + theme(line = element_blank()),
con.smajic$plotGraph(groups = anno, gene = "CADPS2", title = "CADPS2 expression", plot.na = F) + theme(line = element_blank()),
con.neurons$plotGraph(gene = "CADPS2", title = "CADPS2 expression", plot.na = F) + theme(line = element_blank())),
nrow = 3)
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 20820390 1112.0 50841538 2715.3 50841538 2715.3
## Vcells 5972464059 45566.3 11676202160 89082.4 10605578349 80914.2
These plots were created with results from LIANA in Python.
cm.merged <- con$getJointCountMatrix(raw = T) %>%
Matrix::t() %>%
.[, colnames(.) %in% names(anno.major)]
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.major %>%
.[!is.na(.)] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(),
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo %<>%
{. + 1} %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_") %>%
sget(1) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
c("TSPO", "COQ2", "MAPT", "FBXO47", "ELOVL7", "EDN1", "GAB1", "TENM2", "RABGEF1", "PLA2G4C", "INPP4B", "ZIC1", "ZIC2", "ZIC3", "ZIC4", "SNCA", "TPPP", "TPPP") %>%
{Map(\(gene, leg) plotGenePseudoBulk(gene, cm.pseudo, leg), gene = ., leg = c(rep(F, 17), T))} %>%
cowplot::plot_grid(plotlist = ., ncol = 3)
## Warning in stat_compare_means(aes(fill = condition), method = "kruskal", : Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
res.old <- c("TSPO", "COQ2", "MAPT", "FBXO47", "ELOVL7", "EDN1", "GAB1", "TENM2", "RABGEF1", "PLA2G4C", "INPP4B", "ZIC1", "ZIC2", "ZIC3", "ZIC4", "SNCA", "TPPP") %>%
lapply(\(gene) {
idx <- cm.pseudo %>%
colnames() %>%
data.frame(id = .) %>%
mutate(condition = strsplit(id, "_|!!") %>% sget(1),
ct = strsplit(id, "!!") %>% sget(2)) %>%
mutate(ord = order(condition, ct))
x <- cm.pseudo %>%
.[match(gene, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
.[idx$ord]
plot.dat <- x %>%
{data.frame(sample = names(.),
value = unname(.))} %>%
mutate(anno = strsplit(sample, "!!") %>%
sget(2),
condition = strsplit(sample, "!!|_") %>%
sget(1)) %>%
mutate(anno = factor(anno))
stat.test <- plot.dat %>%
group_by(anno) %>%
rstatix::wilcox_test(value ~ condition)
stat.omnibus <- plot.dat %>%
group_by(anno) %>%
rstatix::kruskal_test(value ~ condition)
out <- data.frame(Gene = gene, celltype = stat.test$anno, omnibus.old = stat.omnibus$p, contrast = sapply(seq(nrow(stat.test)), \(x) paste(stat.test$group1[x], stat.test$group2[x], sep = " - ")), padj.old = stat.test$p.adj)
return(out)
}) %>%
bind_rows()
cao <- qread("cao_micro_pvm.qs", nthreads = 10)
anno.sort <- anno.micro[rownames(cao$data.object$embedding)]
anno.sel <- anno.sort[!anno.sort %in% c("MIC_intermediate1", "PVMs")] %>% factor()
emb <- cao$data.object$embedding %>%
`colnames<-`(c("UMAP1","UMAP2")) %>%
.[rownames(.) %in% names(anno.sel), ]
sds_obj <- slingshot(emb,
anno.sel,
start.clus = "MIC_steady-state",
stretch = 0
)
sds <- as.SlingshotDataSet(sds_obj)
pseudotime <- sds_obj@assays@data@listData$pseudotime[, 1]
plot.df <- cao$data.object$embedding %>%
as.data.frame() %>%
.[rownames(.) %in% names(sds_obj), ] %>%
`colnames<-`(c("UMAP1","UMAP2")) %>%
mutate(., annotation = anno.micro[rownames(.)] %>% as.factor()) %>%
.[complete.cases(.),]
ldata <- getTscanTrajectory(cao$data.object, anno.sel)
cao$data.object$embedding %>%
as.data.frame() %>%
mutate(., unname(anno.micro[rownames(.)])) %>%
setNames(c("UMAP1", "UMAP2", "annotation")) %>%
mutate(., pseudotime = pseudotime[rownames(.)]) %>%
ggplot() +
geom_point(aes(UMAP1, UMAP2, col = pseudotime), size = 0.3) +
geom_line(data = ldata1, aes(UMAP1, UMAP2, group = edge), size = 1) +
theme_bw() +
theme(line = element_blank()) +
scale_color_gradient(low = "navyblue", high = "orange") +
labs(col = "Pseudotime", x = "UMAP1", y = "UMAP2")# +
Here, we show the code for running the generalized additive mixed
model. It takes a significant amount of time to run, we provide data in
pseudotime_micro_l1.qs.
mat <- cao$data.object$getJointCountMatrix() %>%
.[match(names(sds_obj), rownames(.)), colSums(.) > 2e2] %>%
.[, !grepl(pattern = "RPL|RPS|MT-", colnames(.))]
mt <- mat %>%
as.matrix() %>%
as.data.frame() %>%
tibble::rownames_to_column() %>%
{message("Mutating"); mutate(.,
pseudotime = pseudotime[rowname],
condition = cao$data.object$getDatasetPerCell()[rowname] %>%
as.character() %>%
strsplit("_") %>%
sget(1),
sample = strsplit(rowname, "!!") %>%
sget(1))} %>%
.[complete.cases(.), ] %>%
{message("Melting"); reshape2::melt(., id.vars = c("rowname", "pseudotime", "condition", "sample"))} %$%
{message("Splitting"); split(., variable)}
res <- mt %>%
sccore::plapply(\(x) {
fit_full <- gamm4(data = x, formula = value ~ s(pseudotime, by = factor(condition)), random = ~(1 | sample), REML = F)
fit_reduced <- gamm4(data = x, formula = value ~ s(pseudotime), random = ~(1 | sample), REML = F)
fit_none <- gamm4(data = x, formula = value ~ 1, random = ~(1 | sample), REML = F)
ann <- anova(fit_full$mer, fit_reduced$mer, fit_none$mer)
if (any(ann$`Pr(>Chisq)` <= 0.05, na.rm = T)) {
residuals <- predict(fit_full$gam, se.fit = T)$fit %>%
unname()
r.sq <- c(summary(fit_full$gam)$r.sq, summary(fit_reduced$gam)$r.sq) %>%
setNames(c("full", "reduced"))
out <- list(anova = ann,
residuals = residuals,
r.sq = r.sq)
return(out)
}
}, n.cores = 40, mc.preschedule = T, mc.cleanup = T, progress = F) %>%
.[!sapply(., is.null)]
qsave(res, "pseudotime_micro_l1.qs")
res <- qread("pseudotime_micro_l1.qs")
rsq.pseudo <- res %>%
sapply(\(gene) gene$r.sq[1]) %>%
sort(decreasing = T)
res.filter <- res[names(rsq.pseudo)[seq(50)] %>% strsplit(".", fixed = T) %>% sget(1)]
cm.merged <- cao$data.object$getJointCountMatrix() %>%
.[match(names(pseudotime), rownames(.)), colnames(.) %in% names(res.filter)] %>%
.[rowSums(.) > 0, ]
## Predict smoothend expression
pseudotime.both <- pseudotime[rownames(cm.merged)]
weights.both <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.both), rownames(.)), 1] + 1E-7
scFit <- cm.merged %>%
Matrix::t() %>%
tradeSeq::fitGAM(pseudotime = pseudotime.both, cellWeights = weights.both, verbose = T)
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
Smooth <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged), tidy = F, n=100)
# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))
# Seriate the results
Smooth <- Smooth[ seriation::get_order(seriation::seriate(Smooth, method="PCA_angle")), ]
# Create heatmap
col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))
Heatmap(Smooth,
col = col_fun,
cluster_columns=F,
cluster_rows=F,
show_column_names = F,
row_names_gp = grid::gpar(fontsize = 5))
cpc <- pseudotime %>%
names() %>%
strsplit("_") %>%
sget(1) %>%
factor()
res.filter %>%
lget("residuals") %>%
lapply(as.data.frame) %>%
lapply(setNames, "residuals") %>%
lapply(mutate, group = cpc, pseudotime = pseudotime) %>%
data.table::rbindlist(idcol = "gene") %>%
ggplot(aes(pseudotime, residuals, col = group)) +
geom_smooth() +
theme_bw() +
facet_wrap(~gene, ncol = 5, scales = "free")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Here, we use public data from Feleke et al.. We isolated microglia based on CSF1R expression and performed a quick annotation using AIF1 expression to identify activated microglia.
con.micro <- qread("feleke_micro.qs")
anno.micro <- getConosCluster(con.micro) %>%
factor(labels = c("Cluster1", "Activated", "Cluster2", "Cluster2"))
# We set sample groups manually as it can't be inferred from sample names
sample.groups <- c("CTRL", "CTRL", "DLB", "DLB", "DLB", "DLB", "DLB", "PDD", "PDD", "PD", "PDD", "PD", "PDD", "PD", "PDD", "PDD", "DLB", "PD", "PDD", "PD", "DLB", "PD", "PD", "CTRL", "CTRL", "CTRL", "CTRL", "CTRL") %>%
setNames(names(con.micro$samples))
con.clone <- con.micro$clone()
con.clone$samples <- con.micro$samples %>% .[which(sample.groups %in% c("CTRL", "PD"))]
sample.groups.clone <- sample.groups %>% .[. %in% c("CTRL", "PD")]
cao <- Cacoa$new(con.clone, sample.groups = sample.groups.clone, ref.level = "CTRL", target.level = "PD", cell.groups = anno.micro, n.cores = 32)
p <- cao$plotCellGroupSizes() + theme(line = element_blank())
p
cao$estimateCellLoadings()
p <- cao$plotCellLoadings()
## Warning: Removed 651 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 651 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).
p
p <- con.clone$plotGraph(size = 0.3, groups = anno.micro, font.size = 5) + theme(line = element_blank())
p
p <- con.clone$plotGraph(gene = "AIF1", title = "AIF1", plot.na = F) + theme(line = element_blank())
p
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 20888932 1115.6 50841538 2715.3 50841538 2715.3
## Vcells 5959471866 45467.2 11676202160 89082.4 11458153615 87418.8
We provide the results output from scHLAcount in
scHLAcount.zip.
samples <- dir("scHLAcount/rydbirk/") %>% .[grepl(pattern = "_", .)]
cms <- lapply(samples, function(x) {
labels <- read.delim(paste0("scHLAcount/rydbirk/",x,"/labels.tsv"), header = F) %>% .[,1]
barcodes <- read.delim(paste0("scHLAcount/rydbirk/",x,"/barcodes.tsv"), header = F) %>%
.[,1] %>%
sapply(function(y) paste0(x,"one_",y))
mtx <- Matrix::readMM(paste0("scHLAcount/rydbirk/",x,"/count_matrix.mtx")) %>%
t() %>%
`dimnames<-`(list(barcodes, labels)) %>%
.[rowSums(.) > 0,] # Remove cells with no counts
tmp <- colnames(mtx) %>%
strsplit("*", T) %>%
sapply(`[[`, 1)
# If more than one allele per HLA type has been detected, sum per HLA type
if(any(tmp %>% table() %>% as.numeric() > 1)) {
cs <- colSums(mtx)
alleles <- unique(tmp)
mtx <- lapply(alleles, function(y) mtx[,tmp == y]) %>%
lapply(function(y) {
if(class(y) == "dgTMatrix") rowSums(y) else y
}) %>%
unlist() %>%
Matrix(nrow = nrow(mtx)) %>%
`rownames<-`(rownames(mtx))
}
colnames(mtx) <- unique(tmp)
return(mtx)
}) %>%
setNames(samples)
Calculations
# Cell-wise
cell.counts <- sapply(cms, rowSums) %>%
unname() %>%
unlist() %>%
`names<-`(., gsub("one_","!!",names(.)))
depth <- getConosDepth(con) %>%
.[match(names(cell.counts), names(.))] %>%
{data.frame(cell = names(.),
sample = names(.) %>% strsplit("!!", T) %>% sapply(`[[`, 1),
depth = unname(.))}
depth[is.na(depth)] <- 0
cell.group <- grepl.replace(names(cell.counts), patterns = c("CTRL","PD","MSA")) %>% as.factor()
mhc1.raw <- sapply(cms, function(x) {
tmp <- x[,colnames(x) %in% c("A","B","C")]
if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>%
unname() %>%
unlist() %>%
`names<-`(., gsub("one_","!!",names(.)))
mhc2.raw <- sapply(cms, function(x) {
tmp <- x[,!colnames(x) %in% c("A","B","C")]
if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>%
unname() %>%
unlist() %>%
`names<-`(., gsub("one_","!!",names(.)))
cell.df <- data.frame(group = cell.group,
counts = cell.counts,
mhc1 = mhc1.raw,
mhc2 = mhc2.raw,
depth = depth$depth) %>%
mutate(., anno = anno.major[match(rownames(.), names(anno.major))],
sample = rownames(.) %>% sapply(strsplit, "!!", T) %>% sapply(`[[`, 1)) %>%
mutate(type = grepl.replace(anno %>% as.character(), levels(anno), c("Brain","Brain","Brain","Peripheral","Peripheral","Peripheral","Brain","Brain","Brain","Brain"))) %>%
`rownames<-`(names(cell.counts)) %>%
filter(!is.na(anno))
Load Smajic data
samples <- dir("scHLAcount/smajic") %>% .[grepl(pattern = "SRR", .)]
cms <- lapply(samples, function(x) {
labels <- read.delim(paste0("scHLAcount/smajic/",x,"/labels.tsv"), header = F) %>% .[,1]
barcodes <- read.delim(paste0("scHLAcount/smajic/",x,"/barcodes.tsv"), header = F) %>%
.[,1] %>%
sapply(function(y) paste0(x,"_",y,"_1"))
mtx <- Matrix::readMM(paste0("scHLAcount/smajic/",x,"/count_matrix.mtx")) %>%
t() %>%
`dimnames<-`(list(barcodes, labels)) %>%
.[rowSums(.) > 0,] # Remove cells with no counts
tmp <- colnames(mtx) %>%
strsplit("*", T) %>%
sapply(`[[`, 1)
# If more than one allele per HLA type has been detected, sum per HLA type
if(any(tmp %>% table() %>% as.numeric() > 1)) {
cs <- colSums(mtx)
alleles <- unique(tmp)
mtx <- lapply(alleles, function(y) mtx[,tmp == y]) %>%
lapply(function(y) {
if(class(y) == "dgTMatrix") rowSums(y) else y
}) %>%
unlist() %>%
Matrix(nrow = nrow(mtx)) %>%
`rownames<-`(rownames(mtx))
}
colnames(mtx) <- unique(tmp)
return(mtx)
}) %>%
setNames(samples)
# Correct naming
name.df <- data.frame(samples = samples, ids = c(rep("PD",6) %>% mapply(function(x,y) paste0(x,y), x = ., y = c(1,1,2:5)),
rep("CTRL",6) %>% mapply(function(x,y) paste0(x,y), x = ., y = 1:6)))
anno <- readRDS(paste0("scHLAcount/smajic/anno.sma.rds")) %>%
setNames(., names(.) %>%
strsplit("_", T) %>%
lapply(function(x) {
if(grepl("C", x[[1]])) x[[1]] %<>% gsub("C","CTRL", .)
return(x)
}) %>%
sapply(function(x) paste0(x[[1]],"_",x[[2]])))
cms <- 1:12 %>%
lapply(function(x) {
rnames <- cms[[x]] %>%
rownames() %>%
names() %>%
sapply(function(y) paste0(name.df[x,2],"_",y)) %>%
unname()
rownames(cms[[x]]) <- rnames
return(cms[[x]])
}) %>%
setNames(names(cms))
Calculations
cell.df.rydbirk <- cell.df
# Cell-wise
cell.counts <- sapply(cms, rowSums) %>%
unname() %>%
unlist()
rem <- cell.counts %>%
names() %>%
table() %>%
.[. > 1] %>%
names()
cell.counts %<>% .[!names(.) %in% rem]
cell.group <- grepl.replace(names(cell.counts), patterns = c("CTRL","PD")) %>% as.factor()
mhc1.raw <- sapply(cms, function(x) {
tmp <- x[,colnames(x) %in% c("A","B","C")]
if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>%
unname() %>%
unlist() %>%
.[!names(.) %in% rem]
mhc2.raw <- sapply(cms, function(x) {
tmp <- x[,!colnames(x) %in% c("A","B","C")]
if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>%
unname() %>%
unlist() %>%
.[!names(.) %in% rem]
cell.df <- data.frame(group = cell.group,
counts = cell.counts,
mhc1 = mhc1.raw,
mhc2 = mhc2.raw) %>%
mutate(., anno = anno[match(rownames(.), names(anno))],
sample = rownames(.) %>% sapply(strsplit, "_", T) %>% sapply(`[[`, 1)) %>%
`rownames<-`(names(cell.counts)) %>%
filter(!is.na(anno))
Integrate with our data and plot
cell.df.int <- rbind(cell.df.rydbirk %>%
select(-c("depth","type")) %>%
mutate(origin = "Rydbirk"),
cell.df %>%
mutate(anno = anno %>% factor(labels = c("Astrocytes","Exc. neurons","Inh. neurons","Endothelial","Eppendymal","Exc. neurons","Inh. neurons","Inh. neurons","Microglia","Oligodendrocytes","OPCs","Pericytes")),
origin = "Smajic")) %>%
filter(! anno %in% c("Blood_immune","Endothelial","Eppendymal","Mural","Pericytes","Pericytes/endothelial","Immune")) %>%
filter(!is.na(anno)) %>%
mutate(anno = anno %>% factor(),
origin = origin %>% factor(),
anno = anno %>% unname() %>% factor(),
sample = sample %>% unname())
cell.anno.df <-
cell.df.int %>%
group_by(anno, sample, group, origin) %>%
summarise(mhc1 = mean(mhc1),
mhc2 = mean(mhc2),
counts = mean(counts),
cells = length(sample)) %>%
as.data.frame() %>%
select(-cells, -counts, -sample)
## `summarise()` has grouped output by 'anno', 'sample', 'group'. You can override
## using the `.groups` argument.
p.text1 <- data.frame(signif = c("n.s.", "n.s.", "n.s.", "n.s.", "n.s.", "0.040", "0.032", "n.s."),
mhc1 = 4.5,
anno = levels(cell.anno.df$anno))
p.text2 <- data.frame(signif = c("0.00057"),
mhc2 = 8.5,
anno = " Microglia")
cowplot::plot_grid(plotlist = list(
ggplot(cell.anno.df,
aes(anno,
mhc1)) +
geom_boxplot(outlier.shape = NA,
aes(fill = group)) +
geom_point(position = position_jitterdodge(jitter.width = 0.2),
aes(fill = group,
col = origin,
shape = group),
size = 2) +
scale_color_manual(values = c("black",
"grey50")) +
labs(x = "",
y = "Mean counts per sample",
title = "MHC-I counts",
fill = "",
col = "",
shape = " ") +
geom_text(data = p.text1, aes(label = signif)) +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
line = element_blank()),
ggplot(cell.anno.df %>% filter(anno == "Microglia") %>% mutate(anno = " Microglia"), aes(anno, mhc2)) +
geom_boxplot(outlier.shape = NA, aes(fill = group)) +
geom_point(position = position_jitterdodge(jitter.width = 0.2), aes(fill = group, col = origin, shape = group), size = 2) +
scale_color_manual(values = c("black","grey50")) +
labs(x = "", y = "", title = "MHC-II counts", fill = "", col = "", shape = " ") +
geom_text(data = p.text2, aes(label = signif)) +
theme_bw() +
scale_fill_manual(values = pal.major) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
line = element_blank())
), ncol = 2, rel_widths = c(2,0.9))
Kruskal-Wallis, asses differences per cell type
cell.anno.df %>%
group_by(anno) %>%
kruskal_test(mhc1 ~ group) %>%
data.frame()
## anno .y. n statistic df p method
## 1 Astrocytes mhc1 38 5.4099627 2 0.0669 Kruskal-Wallis
## 2 Microglia mhc1 37 3.5900071 2 0.1660 Kruskal-Wallis
## 3 Oligodendrocytes mhc1 38 4.0883941 2 0.1290 Kruskal-Wallis
## 4 PVMs mhc1 24 3.5403947 2 0.1700 Kruskal-Wallis
## 5 OPCs mhc1 38 0.9689609 2 0.6160 Kruskal-Wallis
## 6 Inh. neurons mhc1 31 6.4335165 2 0.0401 Kruskal-Wallis
## 7 Exc. neurons mhc1 22 6.8860651 2 0.0320 Kruskal-Wallis
## 8 MSN mhc1 21 4.9345432 2 0.0848 Kruskal-Wallis
cell.anno.df %>%
filter(anno == "Microglia") %>%
group_by(anno) %>%
kruskal_test(mhc2 ~ group) %>%
data.frame()
## anno .y. n statistic df p method
## 1 Microglia mhc2 37 14.92493 2 0.000574 Kruskal-Wallis
Wilcoxon, assess differences between our data and Smajic per condition per cell type
cell.anno.df %>%
filter(group != "MSA",
!anno %in% c("MSN","PVMs")) %>%
group_by(anno, group) %>%
wilcox_test(mhc1 ~ origin) %>%
data.frame()
## anno group .y. group1 group2 n1 n2 statistic p
## 1 Astrocytes CTRL mhc1 Rydbirk Smajic 10 6 27 0.792
## 2 Astrocytes PD mhc1 Rydbirk Smajic 11 5 26 0.913
## 3 Microglia CTRL mhc1 Rydbirk Smajic 9 6 21 0.529
## 4 Microglia PD mhc1 Rydbirk Smajic 11 5 30 0.827
## 5 Oligodendrocytes CTRL mhc1 Rydbirk Smajic 10 6 34 0.713
## 6 Oligodendrocytes PD mhc1 Rydbirk Smajic 11 5 26 0.913
## 7 OPCs CTRL mhc1 Rydbirk Smajic 10 6 15 0.118
## 8 OPCs PD mhc1 Rydbirk Smajic 11 5 31 0.743
## 9 Inh. neurons CTRL mhc1 Rydbirk Smajic 8 6 10 0.080
## 10 Inh. neurons PD mhc1 Rydbirk Smajic 7 5 23 0.432
## 11 Exc. neurons CTRL mhc1 Rydbirk Smajic 6 6 10 0.240
## 12 Exc. neurons PD mhc1 Rydbirk Smajic 3 5 5 0.549
cell.anno.df %>%
filter(group != "MSA",
anno == "Microglia") %>%
group_by(anno, group) %>%
wilcox_test(mhc2 ~ origin) %>%
data.frame()
## anno group .y. group1 group2 n1 n2 statistic p
## 1 Microglia CTRL mhc2 Rydbirk Smajic 9 6 19 0.368
## 2 Microglia PD mhc2 Rydbirk Smajic 11 5 40 0.180
We provide a curated list with MHC and cytokine genes. Please note, the order of clusters may change.
dat <- read.table("MHC_cytokines_curated.tsv", header = T, sep = "\t")
cm.merged <- con$getJointCountMatrix(raw = T) %>%
Matrix::t() %>%
.[, colnames(.) %in% names(anno.major)] %>%
.[rownames(.) %in% (dat$name[dat$class == "cytokine"] %>% gsub("-","",.)), ] %>%
.[rowSums(.) > 0,]
# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.major %>%
.[!is.na(.) & (. %in% c("Microglia", "PVMs"))] %>%
factor()
idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())
anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>%
{`names<-`(as.character(.), names(.))}
anno.final <- paste0(anno.donor,"!!",anno.subtype) %>%
`names<-`(anno.donor %>% names())
# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(),
groups = anno.final, min.cell.count = 1) %>%
t() %>%
apply(2, as, "integer")
cm.pseudo %<>%
{. + 1} %>%
DESeq2::DESeqDataSetFromMatrix(.,
colnames(.) %>%
strsplit("_") %>%
sget(1) %>%
data.frame() %>%
`dimnames<-`(list(colnames(cm.pseudo), "group")),
design = ~ group) %>%
DESeq2::estimateSizeFactors() %>%
DESeq2::counts(normalized = T)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
cm.pseudo %<>%
Matrix::t() %>%
scale() %>%
Matrix::t()
ord <- cm.pseudo %>%
colnames() %>%
{data.frame(id = .)} %>%
mutate(.,
ct = strsplit(id, "!!") %>%
sget(2),
condition = strsplit(id, "!!|_") %>%
sget(1)) %>%
arrange(ct, condition) %>%
pull(id)
cm.pseudo %<>%
.[, match(ord, colnames(.))]
cm.pseudo %<>%
as.data.frame() %>%
tibble::rownames_to_column(var = "gene") %>%
reshape2::melt(id.vars = c("gene")) %>%
mutate(variable = as.character(variable),
condition = strsplit(variable, "!!|_") %>%
sget(1),
ct = strsplit(variable, "!!") %>%
sget(2)) %>%
group_by(condition, ct, gene) %>%
summarize(m = mean(value)) %>%
ungroup() %>%
mutate(variable = paste(condition, ct, sep = "!!")) %>%
select(-condition, -ct) %>%
reshape2::dcast(gene ~ variable, value.var = "m") %>%
tibble::column_to_rownames(var = "gene")
## `summarise()` has grouped output by 'condition', 'ct'. You can override using
## the `.groups` argument.
tmp <- cm.pseudo %>%
colnames() %>%
strsplit("_|!!")
clusters <- tmp %>%
sget(2)
condition <- tmp %>%
sget(1)
ha <- ComplexHeatmap::HeatmapAnnotation(Annotation = clusters,
Condition = condition,
col = list(Annotation = c("Microglia" = unname(pal.major["Microglia"]),
"PVMs" = unname(pal.major["PVMs"])),
Condition = c("CTRL" = unname(pal.major["CTRL"]),
"MSA" = unname(pal.major["MSA"]),
"PD" = unname(pal.major["PD"]))
))
ComplexHeatmap::Heatmap(cm.pseudo,
name = "Expression",
show_column_names = F,
show_row_names = T,
cluster_columns = F,
show_column_dend = F,
cluster_rows = T,
top_annotation = ha,
show_row_dend = F,
column_split = colnames(cm.pseudo) %>% strsplit("!!|_") %>% sget(2) %>% unname() %>% factor(),
col=circlize::colorRamp2(c(-1, 1.2), c("white", "firebrick")),
row_km = 4)
## Warning: The input is a data frame-like object, convert it to a matrix.
c("FOSL2", "TCF4", "STAT1", "NR3C1", "ETV7", "THRB", "BPTF", "RELB", "CEBPD", "HIF1A", "ELF1", "CREM", "ELF2", "ZNF44", "CHD1", "POU2F1", "GTF2IRD1", "NFIA", "NFE2L2", "FOXP1", "FOXO3") %>%
clusterProfiler::enrichGO("org.Hs.eg.db", "SYMBOL", "BP") %>%
enrichplot::pairwise_termsim() %>%
enrichplot::treeplot() +
scale_fill_manual(values = brewer.pal(6, "Greys")[-1])
##
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
We provide the results from scDRS based on Nalls et al. GWAS
summary stats. For calculation of these, see
scDRS_PD.ipynb.
dat.scores <- qread("gwas/nalls/PD.full_score.qs")
# Load downstream analyses for cell types versus trait
dat.ct <- read.delim("gwas/nalls/downstream_celltype.tsv", header = F, sep = "\t") %$%
strsplit(V1, " ") %>%
unlist() %>%
gsub("\\", "", ., fixed = T) %>%
gsub("\n", "", ., fixed = T) %>%
.[!sapply(., \(x) x == "")] %>%
.[c(1:5,69:72, # Header
8:12,75:78, # Astro
15:19,81:84, # EN
21:25,86:89, # Immune
28:32,92:95, # IN
34:38,97:100, # MSN
40:44,102:105, # MIC
46:50,107:110, # OPCs
52:56,112:115, # OL
58:62,117:120, # PVMs
64:68,122:125 # Peri
)]
dat.sign <- dat.ct %>%
split(seq(9)) %>%
bind_rows() %>%
{`colnames<-`(.[-1, ], .[1, ])} %>%
as.data.frame()
dat.sign %<>% mutate(group = c("Astrocytes",
"Exc. neurons",
"Immune",
"Inh. neurons",
"MSN",
"Microglia",
"OPCs",
"Oligodendrocytes",
"PVMs",
"Pericytes/endothelial"))
dat.sign %<>%
filter(!group == "Unknown") %>%
mutate(p.adj = p.adjust(assoc_mcp, method = "fdr"),
hetero.adj = p.adjust(hetero_mcp, method = "fdr")) %>%
mutate(assoc_sign = sapply(assoc_mcp, \(x) if (x <= 0.001) "***" else if (x <= 0.01 && x > 0.001) "**" else if (x <= 0.05 && x > 0.01) "*" else ""),
hetero_sign = sapply(hetero_mcp, \(x) if (x <= 0.001) "###" else if (x <= 0.01 && x > 0.001) "##" else if (x <= 0.05 && x > 0.01) "#" else " ")) %>%
mutate(sign = paste0(assoc_sign,",",hetero_sign) %>% gsub(", ", "", .)) %>%
select(group, sign) %>%
dplyr::rename(type = group)
data.frame(cell = dat.scores$X,
score = dat.scores$norm_score,
type = anno.major[match(dat.scores$X,
names(anno.major))]) %>%
group_by(type) %>%
summarize(m = median(score)) %>%
filter(!is.na(type)) %>%
mutate(type = as.character(type)) %$%
arrange(., desc(m)) %>%
mutate(type = factor(type, levels = type), sign = dat.sign$sign[match(type %>% levels, dat.sign$type)]) %>%
ggplot(aes(type, m, fill = type)) +
geom_bar(stat = "identity") +
geom_text(aes(label = sign), vjust = -0.5) +
theme_bw() +
labs(y = "Mean score", x = "", title = "scDRS mean scores and associations") +
guides(fill = "none") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
line = element_blank()) +
ylim(c(-0.6, 2)) +
scale_fill_manual(values = pal.major) +
geom_hline(yintercept = 0)
dat.ct <- read.delim("gwas/nalls/downstream_celltype_microglia.tsv", header = F, sep = "\t") %$%
strsplit(V1, " ") %>%
unlist() %>%
gsub("\\", "", ., fixed = T) %>%
gsub("\n", "", ., fixed = T) %>%
gsub("group", "", .) %>%
.[!sapply(., \(x) x == "")] %>%
{c("group", .[seq(35)])} %>%
matrix(ncol = 6, byrow = T) %>%
as.data.frame() %>%
`colnames<-`(., .[1, ]) %>%
.[-1, ]
dat.sign <-
dat.ct %>%
filter(!group == "Unknown") %>%
mutate(p.adj = p.adjust(assoc_mcp, method = "fdr"),
hetero.adj = p.adjust(hetero_mcp, method = "fdr")) %>%
mutate(assoc_sign = sapply(assoc_mcp, \(x) if (x <= 0.001) "***" else if (x <= 0.01 && x > 0.001) "**" else if (x <= 0.05 && x > 0.01) "*" else ""),
hetero_sign = sapply(hetero_mcp, \(x) if (x <= 0.001) "###" else if (x <= 0.01 && x > 0.001) "##" else if (x <= 0.05 && x > 0.01) "#" else " ")) %>%
mutate(sign = paste0(assoc_sign,",",hetero_sign) %>% gsub(", ", "", .)) %>%
select(group, sign) %>%
dplyr::rename(type = group)
p <- data.frame(cell = dat.scores$X %>% gsub("one_", "!!", .),
score = dat.scores$norm_score,
type = anno.major[match(dat.scores$X,
names(anno.major))]) %>%
filter(cell %in% names(anno.micro)) %>%
mutate(type = factor(anno.micro[.$cell])) %>%
group_by(type) %>%
summarize(m = mean(score)) %>%
filter(!is.na(type)) %>%
mutate(type = as.character(type)) %>%
arrange(desc(m)) %>%
mutate(type = factor(type, levels = type), sign = dat.sign$sign[match(type %>% levels, dat.sign$type)]) %>%
mutate(type = factor(type, levels = type)) %>%
ggplot(aes(type, m, fill = type)) +
geom_bar(stat = "identity") +
geom_text(aes(label = sign), vjust = -0.5, nudge_y = -0.05) +
theme_bw() +
theme(line = element_blank()) +
labs(y = "Mean score", x = "", title = "scDRS for microglia") +
guides(fill = "none") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
ylim(c(0, 2)) +
scale_fill_manual(values = pal.major)
p
We provide the results from scDRS based on Chia et al. GWAS
summary stats. For calculation of these, see
scDRS_MSA.ipynb.
dat.scores <- qread("gwas/chia/MSA.full_score.qs")
# Load downstream analyses for cell types versus trait
dat.ct <- read.delim("gwas/chia/downstream_celltype.tsv", header = F, sep = "\t") %$%
strsplit(V1, " ") %>%
unlist() %>%
gsub("\\", "", ., fixed = T) %>%
gsub("\n", "", ., fixed = T) %>%
.[!sapply(., \(x) x == "")] %>%
.[c(1:5,69:72, # Header
8:12,75:78, # Astro
15:19,81:84, # EN
21:25,86:89, # Immune
28:32,92:95, # IN
34:38,97:100, # MSN
40:44,102:105, # MIC
46:50,107:110, # OPCs
52:56,112:115, # OL
58:62,117:120, # PVMs
64:68,122:125 # Peri
)]
dat.sign <- dat.ct %>%
split(seq(9)) %>%
bind_rows() %>%
{`colnames<-`(.[-1, ], .[1, ])} %>%
as.data.frame()
dat.sign %<>% mutate(group = c("Astrocytes",
"Exc. neurons",
"Immune",
"Inh. neurons",
"MSN",
"Microglia",
"OPCs",
"Oligodendrocytes",
"PVMs",
"Pericytes/endothelial"))
dat.sign %<>%
filter(!group == "Unknown") %>%
mutate(assoc_sign = sapply(assoc_mcp, \(x) if (x <= 0.001) "***" else if (x <= 0.01 && x > 0.001) "**" else if (x <= 0.05 && x > 0.01) "*" else ""),
hetero_sign = sapply(hetero_mcp, \(x) if (x <= 0.001) "###" else if (x <= 0.01 && x > 0.001) "##" else if (x <= 0.05 && x > 0.01) "#" else " ")) %>%
mutate(sign = paste0(assoc_sign,",",hetero_sign) %>% gsub(",", "", .)) %>%
select(group, sign) %>%
dplyr::rename(type = group)
data.frame(cell = dat.scores$X,
score = dat.scores$norm_score,
type = anno.major[match(dat.scores$X,
names(anno.major))]) %>%
group_by(type) %>%
summarize(m = median(score)) %>%
filter(!is.na(type)) %>%
mutate(type = as.character(type)) %$%
arrange(., desc(m)) %>%
mutate(type = factor(type, levels = type), sign = dat.sign$sign[match(type %>% levels, dat.sign$type)]) %>%
ggplot(aes(type, m, fill = type)) +
geom_bar(stat = "identity") +
geom_text(aes(label = sign), vjust = 1.5) +
theme_bw() +
labs(y = "Mean score", x = "", title = "scDRS mean scores and associations") +
guides(fill = "none") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
line = element_blank()) +
ylim(c(-0.5, 0.5)) +
scale_fill_manual(values = pal.major) +
geom_hline(yintercept = 0)
All subfigures were made in GraphPad Prism. Data are available in
Phagocytosis_assay_triplicates_raw_data_HH.tsv (Copenhagen
experiment) or HOUSTON.tsv (Houston experiment).
Not rerun
anno.major <- qread("anno_major.qs")
anno.micro <- qread("anno_micro_pvm.qs") %>%
renameAnnotation("Steady-state","MIC_steady-state") %>%
renameAnnotation("Intermediate1","MIC_intermediate1") %>%
renameAnnotation("Intermediate2","MIC_intermediate2") %>%
renameAnnotation("Activated","MIC_activated")
anno.glia <- c("anno_astro.qs",
"anno_oligo.qs",
"anno_opc.qs") %>%
lapply(qread) %>%
Reduce(c, .) %>%
factor() %>%
renameAnnotation("Homeostatic_astrocytes", "AS_homeostatic") %>%
renameAnnotation("Reactive_astrocytes", "AS_reactive") %>%
renameAnnotation("Homeostatic_LINC01608", "OL_LINC01608") %>%
renameAnnotation("Homeostatic_SLC5A11", "OL_SLC5A11") %>%
renameAnnotation("Reactive_SCGZ", "OL_SGCZ")
anno.neurons <- qread("anno_neurons.qs")
# Minor final
anno.minor <- anno.major[!anno.major %in% c("Inh. neurons", "Exc. neurons", "PVMs", "Astrocytes", "Oligodendrocytes", "OPCs", "Microglia")] %>%
{factor(c(., anno.neurons, anno.glia, anno.micro))} %>%
factor(levels = sort(levels(.)))
# Major final
anno.major %<>% .[!names(.) %in% names(anno.neurons)] %>%
{factor(c(., anno.neurons))} %>%
collapseAnnotation("MSN") %>%
collapseAnnotation("GABAergic") %>%
collapseAnnotation("GLUergic") %>%
renameAnnotation("MSN", "Medium spiny neurons") %>%
renameAnnotation("GABAergic", "GABAergic neurons") %>%
renameAnnotation("GLUergic", "GLUergic neurons") %>%
factor(levels = sort(levels(.)))
con.major$n.cores <- 32 # Major object
markers.major <- con.major$getDifferentialGenes(groups = anno.major,
z.threshold = 1,
upregulated.only = T,
append.specificity.metrics = T,
append.auc = T)
markers.major %>%
lapply(filter, PAdj <= 0.05) %>%
bind_rows(.id = "Celltype") %>%
write.table("Table SX - Celltype markers, major annotation.tsv", sep = "\t", dec = ".", row.names = F)
Not rerun
res <- list(
Major = list(CTRLvsMSA = "cao_major_msa.qs",
CTRLvsPD = "cao_major_pd.qs",
PDvsMSA = "cao_major_dis.qs"),
Neurons = list(CTRLvsMSA = "cao_neurons_msa.qs",
CTRLvsPD = "cao_neurons_pd.qs",
PDvsMSA = "cao_neurons_dis.qs"),
Glial = list(CTRLvsMSA = "cao_oligo_astro_opc_msa.qs",
CTRLvsPD = "cao_oligo_astro_opc_pd.qs",
PDvsMSA = "cao_oligo_astro_opc_dis.qs"),
Micro_PVM = list(CTRLvsMSA = "cao_micro_pvm_msa.qs",
CTRLvsPD = "cao_micro_pvm_pd.qs",
PDvsMSA = "cao_micro_pvm_dis.qs")) %>%
lapply(\(celltype) sccore::plapply(celltype, \(comp) qread(comp, nthreads = 10)$test.results$coda, n.cores = 3)) %>%
lapply(lapply, \(x) data.frame(subtype = names(x$padj),
loadings.min = apply(x$loadings, 1, min),
loadings.median = apply(x$loadings, 1, median),
loadings.max = apply(x$loadings, 1, max),
padj = unname(x$padj))) %>%
lapply(bind_rows, .id = "Comparison") %>%
bind_rows(.id = "Cell type") %>%
`rownames<-`(NULL)
res %>%
write.table("Table SX - Loadings.tsv", sep = "\t", dec = ".", row.names = F)
Not rerun
cao <- Cacoa$new(data.object = con, # Major object
cell.groups = anno.major,
ref.level = "CTRL",
target.level = "DISEASE",
sample.groups = con$samples %>%
names() %>%
strsplit("_") %>%
sget(1) %>%
{ifelse(. == "CTRL", "CTRL", "DISEASE")} %>%
`names<-`(con$samples %>%
names()),
sample.groups.palette = c(c("yellow", RColorBrewer::brewer.pal(n = 6, "Accent")[5:6]) %>% setNames(c("CTRL","PD","MSA"))))
cao$sample.groups <- con$samples %>%
names() %>%
strsplit("_") %>%
sget(1) %>%
`names<-`(con$samples %>%
names())
res <- cao$plotCellGroupSizes(show.significance = TRUE,
filter.empty.cell.types = FALSE)$data %>%
group_by(group, variable) %>%
summarize(Min_proportion = min(value),
Mean_proportion = mean(value),
Max_proportion = max(value)) %>%
dplyr::rename(Condition = group,
Celltype = variable)
res %>%
write.table("Table SX - Proportions, major annotation.tsv", sep = "\t", dec = ".", row.names = F)
Not rerun
anno.major <- qread("anno_major.qs")
anno.micro <- qread("anno_micro_pvm.qs") %>%
renameAnnotation("Steady-state","MIC_steady-state") %>%
renameAnnotation("Intermediate1","MIC_intermediate1") %>%
renameAnnotation("Intermediate2","MIC_intermediate2") %>%
renameAnnotation("Activated","MIC_activated")
anno.glia <- c("anno_astro.qs",
"anno_oligo.qs",
"anno_opc.qs") %>%
lapply(qread) %>%
Reduce(c, .) %>%
factor() %>%
renameAnnotation("Homeostatic_astrocytes", "AS_homeostatic") %>%
renameAnnotation("Reactive_astrocytes", "AS_reactive") %>%
renameAnnotation("Homeostatic_LINC01608", "OL_LINC01608") %>%
renameAnnotation("Homeostatic_SLC5A11", "OL_SLC5A11") %>%
renameAnnotation("Reactive_SCGZ", "OL_SGCZ")
anno.neurons <- qread("anno_neurons.qs")
# Minor final
anno.minor <- anno.major[!anno.major %in% c("Inh. neurons", "Exc. neurons", "PVMs", "Astrocytes", "Oligodendrocytes", "OPCs", "Microglia")] %>%
{factor(c(., anno.neurons, anno.glia, anno.micro))} %>%
factor(levels = sort(levels(.)))
# Major final
anno.major %<>% .[!names(.) %in% names(anno.neurons)] %>%
{factor(c(., anno.neurons))} %>%
collapseAnnotation("MSN") %>%
collapseAnnotation("GABAergic") %>%
collapseAnnotation("GLUergic") %>%
renameAnnotation("MSN", "Medium spiny neurons") %>%
renameAnnotation("GABAergic", "GABAergic neurons") %>%
renameAnnotation("GLUergic", "GLUergic neurons") %>%
factor(levels = sort(levels(.)))
markers.minor <- con.major$getDifferentialGenes(groups = anno.minor,
z.threshold = 1,
upregulated.only = T,
append.specificity.metrics = T,
append.auc = T)
markers.minor %>%
lapply(filter, PAdj <= 0.05) %>%
bind_rows(.id = "Celltype") %>%
write.table("Table SX - Celltype markers, minor annotation.tsv", sep = "\t", dec = ".", row.names = F)
Not rerun
cao <- Cacoa$new(data.object = con,
cell.groups = anno.minor,
ref.level = "CTRL",
target.level = "DISEASE",
sample.groups = con$samples %>%
names() %>%
strsplit("_") %>%
sget(1) %>%
{ifelse(. == "CTRL", "CTRL", "DISEASE")} %>%
`names<-`(con$samples %>%
names()),
sample.groups.palette = c(c("yellow", RColorBrewer::brewer.pal(n = 6, "Accent")[5:6]) %>% setNames(c("CTRL","PD","MSA"))))
cao$sample.groups <- con$samples %>%
names() %>%
strsplit("_") %>%
sget(1) %>%
`names<-`(con$samples %>%
names())
res <- cao$plotCellGroupSizes(show.significance = TRUE,
filter.empty.cell.types = FALSE)$data %>%
group_by(group, variable) %>%
summarize(Min_proportion = min(value),
Mean_proportion = mean(value),
Max_proportion = max(value)) %>%
dplyr::rename(Condition = group,
Celltype = variable)
res %>%
write.table("Table SX - Proportions, minor annotation.tsv", sep = "\t", dec = ".", row.names = F)
Not rerun
First, correct OPCs/COPs for age covariate according to scITD calculations
cao_msa <- qread("cao_opc_msa.qs", nthreads = 10)
cao_pd <- qread("cao_opc_pd.qs", nthreads = 10)
cao_dis <- qread("cao_opc_dis.qs", nthreads = 10)
metadata.all <- read.delim("metadata.tsv")
meta.df.msa <- metadata.all %>%
filter(sample %in% names(cao_msa$data.object$samples)) %>%
tibble::column_to_rownames("sample") %>%
select(age)
meta.df.pd <- metadata.all %>%
filter(sample %in% names(cao_pd$data.object$samples)) %>%
tibble::column_to_rownames("sample") %>%
select(age)
meta.df.dis <- metadata.all %>%
filter(sample %in% names(cao_dis$data.object$samples)) %>%
tibble::column_to_rownames("sample") %>%
select(age)
all.genes_msa <- cao_msa$data.object$samples %>%
lget("misc") %>%
lget("rawCounts") %>%
lapply(colnames) %>%
Reduce(union, .)
genes.to.omit_msa <- all.genes_msa %>%
.[grepl("MT-|RPL|RPS", .)]
all.genes_pd <- cao_pd$data.object$samples %>%
lget("misc") %>%
lget("rawCounts") %>%
lapply(colnames) %>%
Reduce(union, .)
genes.to.omit_pd <- all.genes_pd %>%
.[grepl("MT-|RPL|RPS", .)]
all.genes_dis <- cao_dis$data.object$samples %>%
lget("misc") %>%
lget("rawCounts") %>%
lapply(colnames) %>%
Reduce(union, .)
genes.to.omit_dis <- all.genes_dis %>%
.[grepl("MT-|RPL|RPS", .)]
cao_msa$estimateDEPerCellType(covariates = meta.df.msa, name = "de_age", genes.to.omit = genes.to.omit_msa)
cao_pd$estimateDEPerCellType(covariates = meta.df.pd, name = "de_age", genes.to.omit = genes.to.omit_pd)
cao_dis$estimateDEPerCellType(covariates = meta.df.dis, name = "de_age", genes.to.omit = genes.to.omit_dis)
cao_msa$estimateOntology("GSEA", name = "gsea_age", de.name = "de_age", org.db = org.Hs.eg.db::org.Hs.eg.db)
cao_pd$estimateOntology("GSEA", name = "gsea_age", de.name = "de_age", org.db = org.Hs.eg.db::org.Hs.eg.db)
cao_dis$estimateOntology("GSEA", name = "gsea_age", de.name = "de_age", org.db = org.Hs.eg.db::org.Hs.eg.db)
qsave(cao_msa, "cao_opc_msa.qs", nthreads = 10)
qsave(cao_pd, "cao_opc_pd.qs", nthreads = 10)
qsave(cao_dis, "cao_opc_dis.qs", nthreads = 10)
go.list <- dir("cacoa/", full.names = T)[-c(4:7, 15:18, 22, 26)] %>%
lapply(qread, nthreads = 10) %>%
lapply(purrr::pluck, "test.results") %>%
lapply(\(x) if ("gsea_age" %in% names(x)) x$gsea_age else x$gsea) %>% # To include age-corrected calculations for OPCs
setNames(dir("cacoa/")[-c(4:7, 15:18, 22, 26)])
go.list %>%
lget("res") %>%
lapply(lapply, lapply, slot, "result") %>%
lapply(lapply, data.table::rbindlist, idcol = "Category") %>%
lapply(data.table::rbindlist, idcol = "Celltype") %>%
data.table::rbindlist(idcol = "Comparison") %>%
mutate(Comparison = sapply(Comparison, \(comp) {
if (grepl("dis", comp)) "PD vs MSA" else if (grepl("msa", comp)) "CTRL vs MSA" else "CTRL vs PD"
})) %>%
filter(p.adjust <= 0.05) %>%
write.table("GSEA.csv", sep = ",", dec = ".", row.names = F)
Not rerun
de.list <- dir("cacoa/", full.names = T)[-c(4:7, 15:18, 22, 26)] %>%
lapply(qread, nthreads = 10) %>%
lapply(purrr::pluck, "test.results") %>%
lapply(\(x) if ("de_age" %in% names(x)) x$de_age else x$de) %>% # To include results from age-corrected calculations for OPCs
setNames(dir("cacoa/")[-c(4:7, 15:18, 22, 26)])
de.list %>%
lapply(lget, "res") %>%
lapply(data.table::rbindlist, idcol = "Celltype") %>%
lapply(select, Celltype, baseMean, log2FoldChange, lfcSE, stat, pvalue, padj, Gene, Z, Za, CellFrac, SampleFrac) %>%
data.table::rbindlist(idcol = "Comparison") %>%
mutate(Comparison = sapply(Comparison, \(comp) {
if (grepl("dis", comp)) "PD vs MSA" else if (grepl("msa", comp)) "CTRL vs MSA" else "CTRL vs PD"
})) %>%
filter(padj <= 0.05) %>%
write.table("DEGs.csv", sep = ",", dec = ".", row.names = F)
Not rerun. We need the res.filter object from calculation of smoothed heatmap for microglia activation trajectory.
go <- clusterProfiler::enrichGO(names(res.filter),
OrgDb = "org.Hs.eg.db",
keyType = "SYMBOL",
universe = colnames(con$getJointCountMatrix()))
go@result %>%
filter(pvalue <= 0.05) %>%
write.table("Microglia_pseudotime_activation-lineage_GO.tsv", sep = "\t", row.names = F)
Not rerun. We need the Smooth object for the microglia to PVM trajectory.
go.list <- list(repressed = rownames(Smooth)[1:which(rownames(Smooth) == "EPB41L2")],
induced = rownames(Smooth)[(which(rownames(Smooth) == "EPB41L2")+1):nrow(Smooth)]) %>%
lapply(clusterProfiler::enrichGO,
OrgDb = "org.Hs.eg.db",
keyType = "SYMBOL",
universe = colnames(con$getJointCountMatrix()))
go.list %>%
lapply(\(x) x@result) %>%
lapply(filter, pvalue <= 0.05) %>%
data.table::rbindlist(idcol = "geneset") %>%
write.table("Microglia_pseudotime_PVM-lineage_GO.tsv", sep = "\t", row.names = F)
Not rerun. We use the publicly available data from Rydbirk et al..
# Create universes
dat.all <- read.delim("CSF-Sv_Soluble-frac__Report.tsv")
universe.gene <- dat.all %>%
pull(PG.Genes) %>%
unique()
universe.prot <- dat.all %>%
pull(PG.UniProtIds)
# DEPs
dat <- read.delim("Soluble_fraction.txt")
# KEGG, MSA
kegg.res <- dat %>%
filter(Disease.group == "MSA") %>%
pull(UniProt.ID) %>%
clusterProfiler::enrichKEGG(keyType = "uniprot", pvalueCutoff = 0.2, universe = universe.prot, organism = "hsa")
# KEGG, PD
kegg.res.pd <- dat %>%
filter(Disease.group == "PD") %>%
pull(UniProt.ID) %>%
clusterProfiler::enrichKEGG(keyType = "uniprot", pvalueCutoff = 0.2, universe = universe.prot, organism = "hsa")
# Write table
list(MSA = kegg.res, PD = kegg.res.pd) %>%
lapply(getElement, "result") %>%
lapply(dplyr::select, Description, GeneRatio, BgRatio, pvalue, p.adjust, ID, geneID) %>%
bind_rows(.id = "Disease") %>%
write.table("TableS13_KEGG.tsv", sep = "\t", row.names = F)
Time to knit
Sys.time() - tt
## Time difference of 28.83827 mins
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.2 LTS
##
## Matrix products: default
## BLAS/LAPACK: /opt/intel/oneapi/mkl/2025.1/lib/libmkl_gf_lp64.so.2; LAPACK version 3.12.0
##
## Random number generation:
## RNG: L'Ecuyer-CMRG
## Normal: Inversion
## Sample: Rejection
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ggraph_2.2.1 scITD_1.0.4
## [3] gamm4_0.2-7 mgcv_1.9-3
## [5] nlme_3.1-168 lme4_1.1-37
## [7] slingshot_2.16.0 TrajectoryUtils_1.16.1
## [9] SingleCellExperiment_1.30.1 SummarizedExperiment_1.38.1
## [11] Biobase_2.68.0 GenomicRanges_1.60.0
## [13] GenomeInfoDb_1.44.0 IRanges_2.42.0
## [15] S4Vectors_0.46.0 BiocGenerics_0.54.0
## [17] generics_0.1.4 MatrixGenerics_1.20.0
## [19] matrixStats_1.5.0 princurve_2.1.6
## [21] rstatix_0.7.2 stringr_1.5.1
## [23] ComplexHeatmap_2.24.0 circlize_0.4.16
## [25] ggrepel_0.9.6 CRMetrics_0.3.2
## [27] RColorBrewer_1.1-3 ggpmisc_0.6.1
## [29] ggpp_0.5.8-1 ggforce_0.4.2
## [31] reshape2_1.4.4 STRINGdb_2.20.0
## [33] ggpubr_0.6.0 cowplot_1.1.3
## [35] ggsci_3.2.0 ggplot2_3.5.2
## [37] qs_0.27.3 scHelper_0.0.5
## [39] sccore_1.0.5 cacoa_0.4.0
## [41] dplyr_1.1.4 magrittr_2.0.3
## [43] conos_1.5.2 igraph_2.1.4
## [45] Matrix_1.7-3
##
## loaded via a namespace (and not attached):
## [1] rTensor_1.4.8 R.methodsS3_1.8.2
## [3] dichromat_2.0-0.1 Rmisc_1.5.1
## [5] Biostrings_2.76.0 vctrs_0.6.5
## [7] ggtangle_0.0.6 RApiSerialize_0.1.4
## [9] digest_0.6.37 png_0.1-8
## [11] shape_1.4.6.1 registry_0.5-1
## [13] combinat_0.0-8 magick_2.8.6
## [15] MASS_7.3-65 foreach_1.5.2
## [17] httpuv_1.6.16 qvalue_2.40.0
## [19] withr_3.0.2 ggrastr_1.0.2
## [21] psych_2.5.3 xfun_0.52
## [23] ggfun_0.1.8 survival_3.8-3
## [25] memoise_2.0.1 ggbeeswarm_0.7.2
## [27] clusterProfiler_4.16.0 MatrixModels_0.5-4
## [29] gson_0.1.0 tidytree_0.4.6
## [31] GlobalOptions_0.1.2 gtools_3.9.5
## [33] pbapply_1.7-2 R.oo_1.27.1
## [35] Formula_1.2-5 KEGGREST_1.48.0
## [37] promises_1.3.2 httr_1.4.7
## [39] hash_2.2.6.3 stringfish_0.16.0
## [41] rstudioapi_0.17.1 UCSC.utils_1.4.0
## [43] DOSE_4.2.0 polyclip_1.10-7
## [45] ca_0.71.1 TSCAN_1.46.0
## [47] GenomeInfoDbData_1.2.14 quadprog_1.5-8
## [49] SparseArray_1.8.0 xtable_1.8-4
## [51] doParallel_1.0.17 evaluate_1.0.3
## [53] S4Arrays_1.8.0 irlba_2.3.5.1
## [55] colorspace_2.1-1 polynom_1.4-1
## [57] later_1.4.2 viridis_0.6.5
## [59] ggtree_3.16.0 lattice_0.22-7
## [61] SparseM_1.84-2 triebeard_0.4.1
## [63] pillar_1.10.2 iterators_1.0.14
## [65] caTools_1.18.3 compiler_4.5.1
## [67] stringi_1.8.7 TSP_1.2-4
## [69] minqa_1.2.8 plyr_1.8.9
## [71] drat_0.2.5 crayon_1.5.3
## [73] abind_1.4-8 gridGraphics_0.5-1
## [75] chron_2.3-62 locfit_1.5-9.12
## [77] graphlayouts_1.2.2 org.Hs.eg.db_3.21.0
## [79] bit_4.6.0 fastmatch_1.1-6
## [81] tradeSeq_1.22.0 codetools_0.2-20
## [83] bslib_0.9.0 GetoptLong_1.0.5
## [85] mime_0.13 splines_4.5.1
## [87] Rcpp_1.0.14 quantreg_6.1
## [89] sparseMatrixStats_1.20.0 pagoda2_1.0.12
## [91] brew_1.0-10 N2R_1.0.3
## [93] knitr_1.50 blob_1.2.4
## [95] clue_0.3-66 fs_1.6.6
## [97] confintr_1.0.2 DelayedMatrixStats_1.30.0
## [99] Rdpack_2.6.4 ggsignif_0.6.4
## [101] ggplotify_0.1.2 tibble_3.2.1
## [103] sqldf_0.4-11 statmod_1.5.0
## [105] tweenr_2.0.3 pkgconfig_2.0.3
## [107] tools_4.5.1 Rook_1.2
## [109] cachem_1.1.0 rbibutils_2.3
## [111] RSQLite_2.3.11 viridisLite_0.4.2
## [113] DBI_1.2.3 fastmap_1.2.0
## [115] rmarkdown_2.29 scales_1.4.0
## [117] pbmcapply_1.5.1 ica_1.0-3
## [119] broom_1.0.8 sass_0.4.10
## [121] patchwork_1.3.0 carData_3.0-5
## [123] farver_2.1.2 reformulas_0.4.1
## [125] tidygraph_1.3.1 gsubfn_0.7
## [127] yaml_2.3.10 cli_3.6.5
## [129] purrr_1.0.4 lifecycle_1.0.4
## [131] uwot_0.2.3 backports_1.5.0
## [133] BiocParallel_1.42.0 gtable_0.3.6
## [135] rjson_0.2.23 parallel_4.5.1
## [137] ape_5.8-1 SnowballC_0.7.1
## [139] limma_3.64.0 jsonlite_2.0.0
## [141] edgeR_4.6.2 seriation_1.5.7
## [143] bitops_1.0-9 bit64_4.6.0-1
## [145] Rtsne_0.17 yulab.utils_0.2.0
## [147] coda.base_1.0.0 proto_1.0.0
## [149] urltools_1.7.3 RcppParallel_5.1.10
## [151] jquerylib_0.1.4 GOSemSim_2.34.0
## [153] R.utils_2.13.0 lazyeval_0.2.2
## [155] shiny_1.10.0 htmltools_0.5.8.1
## [157] enrichplot_1.28.2 GO.db_3.21.0
## [159] glue_1.8.0 XVector_0.48.0
## [161] treeio_1.32.0 mclust_6.1.1
## [163] dunn.test_1.3.6 mnormt_2.1.1
## [165] RMTstat_0.3.1 gridExtra_2.3
## [167] boot_1.3-31 R6_2.6.1
## [169] tidyr_1.3.1 DESeq2_1.48.1
## [171] gplots_3.2.0 labeling_0.4.3
## [173] cluster_2.1.8.1 FSA_0.10.0
## [175] aplot_0.2.5 nloptr_2.2.1
## [177] DelayedArray_0.34.1 tidyselect_1.2.1
## [179] vipor_0.4.7 plotrix_3.8-4
## [181] dendsort_0.3.4 car_3.1-3
## [183] AnnotationDbi_1.70.0 leidenAlg_1.1.5
## [185] fastICA_1.2-7 KernSmooth_2.23-26
## [187] data.table_1.17.2 fgsea_1.34.0
## [189] rlang_1.1.6 lsa_0.73.3
## [191] ggnewscale_0.5.1 Cairo_1.6-2
## [193] beeswarm_0.4.0